<img src="https://numpy.org/images/logos/numpy.svg" width=50px height=50px>

The fundamental package for scientific computing with Python

https://numpy.org/

* powerful N-dim array object

* broadcasting functions

* LinAlg, Matrix Manipulation, Fourier Series

In [1]:
!pip install numpy



In [2]:
import numpy as np

### Numpy Arrays

* Vector - 1D array


* Matrix - 2D array

In [3]:
mylist = [1,2,3]

In [5]:
type(mylist)

list

In [6]:
type(np.array(mylist))

numpy.ndarray

In [7]:
mymatrix = [[1,2,3],[4,5,6],[7,8,9]]
mymatrix

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

In [8]:
type(mymatrix)

list

In [9]:
type(np.array(mymatrix))

numpy.ndarray

In [10]:
np.array(mymatrix)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

## Built In Methods

#### arange -
Returns evenly spaced values in given interval

In [11]:
np.arange(0,10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [12]:
np.arange(0,10,2)

array([0, 2, 4, 6, 8])

#### Zeros and Ones

In [13]:
np.zeros(3)

array([0., 0., 0.])

In [14]:
np.ones(3)

array([1., 1., 1.])

In [15]:
np.zeros((4,4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [16]:
np.ones((4,4))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

#### Linspace -

Returns evenly spaced numbers over a specified interval

In [20]:
np.linspace(0,10,5)

array([ 0. ,  2.5,  5. ,  7.5, 10. ])

#### Identity Matrix

In [21]:
np.eye(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

### Random Number Generation

#### rand

Creates an array of given shape and populates it with array of uniform distribution over (0,1)

In [23]:
np.random.rand(2)

array([0.60646952, 0.48400759])

#### randn
Returns sample of standard normal distribution

In [24]:
np.random.randn(4)

array([ 1.16581461, -0.3645305 ,  1.02588802,  0.38129504])

In [26]:
np.random.randn(5,5)

array([[ 0.04504038,  0.26234797,  0.87563777,  0.62370201,  1.85319986],
       [ 0.23401525,  0.82444257,  0.5612748 , -0.94721164, -0.51689866],
       [ 1.01065937,  0.64561756, -1.11634715,  2.03449087,  0.21399304],
       [ 0.09878095,  0.27902561, -0.99533311,  0.98662053,  0.14694491],
       [-0.26181701, -0.46646171, -0.99481831, -0.54899662,  0.85076694]])

#### randint

In [30]:
np.random.randint(1,100,10)

array([89, 80, 79, 28, 62,  2, 50, 94, 67, 42])

### Array attributes and methods

In [32]:
arr = np.arange(25)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24])

In [34]:
ranarr = np.random.randint(0,50,10)
ranarr

array([13, 33,  3, 37,  1,  9,  1, 36, 47, 20])

#### Reshape

In [39]:
arr.reshape(5,5)#.shape

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [37]:
arr.shape

(25,)

#### max, min, argmax, argmin

In [40]:
ranarr

array([13, 33,  3, 37,  1,  9,  1, 36, 47, 20])

In [3]:
import numpy as np

arr = np.arange(1,11)
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [5]:
arr[1:5]

array([2, 3, 4, 5])

In [6]:
mat = np.arange(36)

In [7]:
mat

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35])

In [9]:
mat = mat.reshape(6,6)
mat

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35]])

In [10]:
mat[:2]

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11]])

In [13]:
mat[:2,1:3]

array([[1, 2],
       [7, 8]])

In [15]:
mat[:3, 2:5]

array([[ 2,  3,  4],
       [ 8,  9, 10],
       [14, 15, 16]])

In [17]:
mat[2:, 3:]

array([[15, 16, 17],
       [21, 22, 23],
       [27, 28, 29],
       [33, 34, 35]])

### Selection

In [18]:
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [19]:
arr>4

array([False, False, False, False,  True,  True,  True,  True,  True,
        True])

In [20]:
arr[arr>4]

array([ 5,  6,  7,  8,  9, 10])

## Pandas

### Series

Panda series is similar to numpy arrays.

Diff b/w panda series and numpy array

1) Series can store any data type
2) In ndarrays we select the value by their indices which is numeric, in series index can be numeric or character

In [23]:
import numpy as np
import pandas as pd

### Create a series

You can convert a list , np array or dictionary into a series

In [24]:
labels = ['a', 'b', 'c'] # list of strings
my_list = [10,20,30] # list of numbers
arr = np.array([10,20,30]) # numpy array
mydict = {'a':10, 'b':'20', 'c':30} # dictionary

In [26]:
pd.Series(my_list)

0    10
1    20
2    30
dtype: int64

In [29]:
pd.Series(data=my_list,index=labels)

a    10
b    20
c    30
dtype: int64

In [30]:
pd.Series(arr)

0    10
1    20
2    30
dtype: int32

In [31]:
pd.Series(arr, index=labels)

a    10
b    20
c    30
dtype: int32

In [35]:
al = pd.Series(mydict)

In [37]:
type(al['a'])

int

In [38]:
type(al['b'])

str

In [39]:
pd.Series([sum,len,print,type])

0      <built-in function sum>
1      <built-in function len>
2    <built-in function print>
3               <class 'type'>
dtype: object

In [41]:
ser1 = pd.Series([1,2,3,4, 5], index=['AP', 'Haryana', 'Mumbai', 'Chennai', 'Bidhanagar'])

In [42]:
ser1

AP            1
Haryana       2
Mumbai        3
Chennai       4
Bidhanagar    5
dtype: int64

In [43]:
ser2 = pd.Series([10,20,30,40, 50], index=['AP', 'Haryana', 'Mumbai', 'Chennai', 'Vadodara'])

In [44]:
ser2

AP          10
Haryana     20
Mumbai      30
Chennai     40
Vadodara    50
dtype: int64

In [45]:
ser1 + ser2

AP            11.0
Bidhanagar     NaN
Chennai       44.0
Haryana       22.0
Mumbai        33.0
Vadodara       NaN
dtype: float64

In [46]:
ser1

AP            1
Haryana       2
Mumbai        3
Chennai       4
Bidhanagar    5
dtype: int64

In [47]:
ser2

AP          10
Haryana     20
Mumbai      30
Chennai     40
Vadodara    50
dtype: int64

### Dataframe

In [52]:
df = pd.DataFrame(np.random.randn(5,4), 
                  index='A B C D E'.split(), 
                  columns='W X Y Z'.split())

In [53]:
df

Unnamed: 0,W,X,Y,Z
A,0.224458,-1.532785,1.747703,-1.192762
B,0.63778,0.361488,-0.547502,1.371522
C,-0.540187,0.337693,-0.694258,-0.667927
D,-0.564645,-1.332626,-0.445426,-0.178894
E,0.611428,0.960352,0.001259,-0.309769


In [56]:
df = pd.DataFrame(np.random.randn(5,4), 
                  index=['A','B','C','D','E'], 
                  columns='W X Y Z'.split())

In [57]:
df['W']

A   -0.576123
B   -1.717182
C   -0.670599
D    1.485609
E    0.178179
Name: W, dtype: float64

In [58]:
df[:2]

Unnamed: 0,W,X,Y,Z
A,-0.576123,-1.100938,0.04141,0.985205
B,-1.717182,-0.232594,-0.069395,0.003701


In [67]:
df['new'] = df['W'] + df['Z']

In [68]:
df

Unnamed: 0,W,X,Y,Z,new
A,-0.576123,-1.100938,0.04141,0.985205,0.409082
B,-1.717182,-0.232594,-0.069395,0.003701,-1.713481
C,-0.670599,1.872306,-1.115633,-0.278952,-0.949551
D,1.485609,-1.471508,0.87786,0.098105,1.583714
E,0.178179,0.485091,-0.028882,1.894045,2.072224


In [70]:
df.drop(['new'], axis = 1)

Unnamed: 0,W,X,Y,Z
A,-0.576123,-1.100938,0.04141,0.985205
B,-1.717182,-0.232594,-0.069395,0.003701
C,-0.670599,1.872306,-1.115633,-0.278952
D,1.485609,-1.471508,0.87786,0.098105
E,0.178179,0.485091,-0.028882,1.894045


In [78]:
df.loc['A','Y'] # By index value

0.04140953236502946

In [79]:
df.iloc[0] # By position

W     -0.576123
X     -1.100938
Y      0.041410
Z      0.985205
new    0.409082
Name: A, dtype: float64

### Selection subset by rows and columns

In [81]:
df.loc[['B', 'C'], ['W', 'Z']]

Unnamed: 0,W,Z
B,-1.717182,0.003701
C,-0.670599,-0.278952


In [82]:
df[df>0]

Unnamed: 0,W,X,Y,Z,new
A,,,0.04141,0.985205,0.409082
B,,,,0.003701,
C,,1.872306,,,
D,1.485609,,0.87786,0.098105,1.583714
E,0.178179,0.485091,,1.894045,2.072224


In [84]:
df[df['W']> 0]

Unnamed: 0,W,X,Y,Z,new
D,1.485609,-1.471508,0.87786,0.098105,1.583714
E,0.178179,0.485091,-0.028882,1.894045,2.072224


In [87]:
df[df['W']> 0][['Y', 'X']]

Unnamed: 0,Y,X
D,0.87786,-1.471508
E,-0.028882,0.485091


In [94]:
df[(df['W']> 0) & (df['Z'] > 1)]

Unnamed: 0,W,X,Y,Z,new
D,1.485609,-1.471508,0.87786,0.098105,1.583714
E,0.178179,0.485091,-0.028882,1.894045,2.072224
