## Numpy 

In [2]:
import numpy as np

### indexing

In [5]:
array_a = np.array([[2,3,5],[1,4,2]])
array_a

array([[2, 3, 5],
       [1, 4, 2]])

In [4]:
type(array_a)

numpy.ndarray

In [6]:
array_a[0]

array([2, 3, 5])

In [7]:
array_a[0][2]

5

In [8]:
array_a[1][2]

2

In [10]:
array_a[:,1:]

array([[3, 5],
       [4, 2]])

In [11]:
array_a[-1]

array([1, 4, 2])

In [12]:
array_a[0,1]= 4

In [13]:
array_a

array([[2, 4, 5],
       [1, 4, 2]])

In [14]:
array_a[0]=4

In [15]:
array_a

array([[4, 4, 4],
       [1, 4, 2]])

In [16]:
array_a[:,0 ]=10

In [17]:
array_a

array([[10,  4,  4],
       [10,  4,  2]])

## Supported data types

In [7]:
import numpy as np

In [8]:
array_b = np.array([[1,2,4],[4,5,7]], dtype='float')
array_b

array([[1., 2., 4.],
       [4., 5., 7.]])

In [9]:
array_b = np.array([[1,2,4],[4,5,7]], dtype=np.float16)
array_b

array([[1., 2., 4.],
       [4., 5., 7.]], dtype=float16)

In [11]:
array_b = np.array([[1,2,4],[4,5,7]], dtype=np.complex64)
array_b

array([[1.+0.j, 2.+0.j, 4.+0.j],
       [4.+0.j, 5.+0.j, 7.+0.j]], dtype=complex64)

In [12]:
array_b = np.array([[1,2,4],[4,5,7]], dtype=np.bool)
array_b

array([[ True,  True,  True],
       [ True,  True,  True]])

In [13]:
array_b = np.array([[1,2,4],[4,5,7]], dtype=np.str)
array_b

array([['1', '2', '4'],
       ['4', '5', '7']], dtype='<U1')

## Broadcasting

Broadcasting rules > 
The arrays have the same shape. 
The arrays have the same number of dimensions , and the length of each dimension is either common or 1.
The arrays that have too few dimension can have their shapes altered with a dimension 1, to satisfy the second rule.

In [14]:
array_a = np.array([1,2,3])
array_a

array([1, 2, 3])

In [15]:
array_b = np.array([[1],[2]])
array_b

array([[1],
       [2]])

In [16]:
matrix_c = np.array([[1,2,3],[4,5,6]])
matrix_c

array([[1, 2, 3],
       [4, 5, 6]])

In [17]:
np.add(array_a,matrix_c)

array([[2, 4, 6],
       [5, 7, 9]])

In [18]:
np.add(array_b,matrix_c)

array([[2, 3, 4],
       [6, 7, 8]])

### Type casting

In [20]:
np.add(array_b, matrix_c , dtype=np.float64)

array([[2., 3., 4.],
       [6., 7., 8.]])

In [21]:
np.add(array_b, matrix_c , dtype=np.str)

UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('<U'), dtype('<U')) -> dtype('<U')

### Running over an Axis

In [25]:
np.mean(matrix_c,axis=0)  # axis = 0 , finding the mean for each column of the array

array([2.5, 3.5, 4.5])

In [23]:
matrix_c

array([[1, 2, 3],
       [4, 5, 6]])

In [26]:
np.mean(matrix_c,axis=1)  # axis = 1 , finding the mean for each row of the array

array([2., 5.])

### ndarrays

In [27]:
#  ndarray = N-Dimensional array
# N > Natural Numbers 

array_a = np.array([1,2,3])

In [28]:
array_a

array([1, 2, 3])

In [29]:
type(array_a)

numpy.ndarray

In [30]:
print(array_a)

[1 2 3]


In [31]:
array_a.shape

(3,)

In [33]:
array_b = np.array([[7,8,9],[10,11,12]])
array_b

array([[ 7,  8,  9],
       [10, 11, 12]])

In [34]:
array_b.shape

(2, 3)

In [35]:
type(array_b)

numpy.ndarray

In [36]:
print(array_b)

[[ 7  8  9]
 [10 11 12]]


In [37]:
array_c = np.array(13)

In [38]:
type(array_c)

numpy.ndarray

In [39]:
print(array_c)

13


In [40]:
array_c.shape

()

In [41]:
array_d = np.array([13])


In [42]:
type(array_d)

numpy.ndarray

In [43]:
array_d.shape

(1,)

In [44]:
print(array_d)

[13]


### List vs Array

In [45]:
list_a = [1,2,4,5,6,7]

In [46]:
len(list_a)

6

In [47]:
list_a = [[1,2,4],[5,6,7]]


In [48]:
len(list_a)

2

In [50]:
type(list_a)

list

In [51]:
array_a = np.array(list_a)

In [52]:
array_a

array([[1, 2, 4],
       [5, 6, 7]])

In [53]:
type(array_a)

numpy.ndarray

In [54]:
print(list_a)

[[1, 2, 4], [5, 6, 7]]


In [55]:
print(array_a)

[[1 2 4]
 [5 6 7]]


In [56]:
# List have len() ## Array have shape
array_a.shape

(2, 3)

In [57]:
list_a.shape

AttributeError: 'list' object has no attribute 'shape'

In [59]:
len(list_a)

2

In [62]:
list_b = list_a[0] + list_a[1]
array_b = array_a[0]+array_a[1]

In [63]:
print(list_b)

[1, 2, 4, 5, 6, 7]


In [64]:
print(array_b)

[ 6  8 11]


In [65]:
np.sqrt(array_a)

array([[1.        , 1.41421356, 2.        ],
       [2.23606798, 2.44948974, 2.64575131]])

### String vs Object vs Numbers

In [74]:
lending_co_lt = np.genfromtxt('data/lending-co-LT.csv', delimiter=',',dtype=np.str) # np > NumPy specific datatype # int32 > Integers up to 32 bits

In [75]:
lending_co_lt

array([['LoanID', 'StringID', 'Product', ..., 'Location', 'Region',
        'TotalPrice'],
       ['1', 'id_1', 'Product B', ..., 'Location 2', 'Region 2',
        '16600.0'],
       ['2', 'id_2', 'Product B', ..., 'Location 3', '', '16600.0'],
       ...,
       ['1041', 'id_1041', 'Product B', ..., 'Location 23', 'Region 4',
        '16600.0'],
       ['1042', 'id_1042', 'Product C', ..., 'Location 52', 'Region 6',
        '15600.0'],
       ['1043', 'id_1043', 'Product B', ..., 'Location 142', 'Region 6',
        '16600.0']], dtype='<U14')

In [76]:
print(lending_co_lt)

[['LoanID' 'StringID' 'Product' ... 'Location' 'Region' 'TotalPrice']
 ['1' 'id_1' 'Product B' ... 'Location 2' 'Region 2' '16600.0']
 ['2' 'id_2' 'Product B' ... 'Location 3' '' '16600.0']
 ...
 ['1041' 'id_1041' 'Product B' ... 'Location 23' 'Region 4' '16600.0']
 ['1042' 'id_1042' 'Product C' ... 'Location 52' 'Region 6' '15600.0']
 ['1043' 'id_1043' 'Product B' ... 'Location 142' 'Region 6' '16600.0']]


## Slicing

### Conditional Slicing

In [77]:
matrix_c = np.array([[1,1,0,3,5],[2,2,5,3,1],[2,1,0,8,6]])
matrix_c

array([[1, 1, 0, 3, 5],
       [2, 2, 5, 3, 1],
       [2, 1, 0, 8, 6]])

In [79]:
matrix_c[:,0]

array([1, 2, 2])

In [81]:
matrix_c[:,0]>1

array([False,  True,  True])

In [95]:
matrix_c[matrix_c[:,0]>1]

array([[2, 2, 5, 3, 1],
       [2, 1, 0, 8, 6]])

In [96]:
matrix_c[matrix_c[:,:]%2 == 0]

array([0, 2, 2, 2, 0, 8, 6])

In [97]:
matrix_c[(matrix_c[:,:]%2 ==0) & (matrix_c[:,:]<=4)]

array([0, 2, 2, 2, 0])

In [98]:
matrix_c[(matrix_c[:,:]%2 ==0) | (matrix_c[:,:]<=4)]

array([1, 1, 0, 3, 2, 2, 3, 1, 2, 1, 0, 8, 6])

### Dimensions and the Squeeze Function

In [99]:
matrix_D = np.array([[3,4,6,1,2],[7,5,8,9,0],[4,6,2,1,0]])
matrix_D

array([[3, 4, 6, 1, 2],
       [7, 5, 8, 9, 0],
       [4, 6, 2, 1, 0]])

In [100]:
type(matrix_D)

numpy.ndarray

In [101]:
type(matrix_D[0,0])

numpy.int32

In [105]:
matrix_D[0,0:1]

array([3])

In [102]:
type(matrix_D[0,0:1])

numpy.ndarray

In [106]:
matrix_D[0:1,0:1]

array([[3]])

In [107]:
matrix_D[0:1,0:1].shape

(1, 1)

In [109]:
print(matrix_D[0,0].shape) #scalar
print(matrix_D[0,0:1].shape) #vector
print(matrix_D[0:1,0:1].shape) # Matrix

()
(1,)
(1, 1)


In [113]:
matrix_D[0:1,0:1].squeeze()

array(3)

In [127]:
print(matrix_D[0:1,0:1].squeeze())

3


## Generating Data with numpy

In [114]:
import numpy as np

### np.empty(), np.zeros(), np.ones(), np.full()

In [115]:
array_empty = np.empty(shape = (2,3))

In [117]:
array_empty

array([[1.        , 1.41421356, 2.        ],
       [2.23606798, 2.44948974, 2.64575131]])

In [118]:
array_0s = np.zeros(shape=(2,3))
array_0s

array([[0., 0., 0.],
       [0., 0., 0.]])

In [120]:
array_0s = np.zeros(shape=(2,3), dtype = np.int8)
array_0s

array([[0, 0, 0],
       [0, 0, 0]], dtype=int8)

In [122]:
array_1s = np.ones(shape=(2,3), dtype=int)
array_1s

array([[1, 1, 1],
       [1, 1, 1]])

In [124]:
array_full = np.full(shape = (2,3),fill_value=2)
array_full

array([[2, 2, 2],
       [2, 2, 2]])

### "_like" functions

In [125]:
matrix_A = np.array([[3,2,1,6,7],[3,6,8,2,1],[7,9,0,9,0]])
matrix_A

array([[3, 2, 1, 6, 7],
       [3, 6, 8, 2, 1],
       [7, 9, 0, 9, 0]])

In [129]:
arr_empty_like = np.empty_like(matrix_A)
arr_empty_like

array([[      0,       0,       0,       0,       0],
       [      0,       0,       0,       0,       0],
       [   1524,       0,       0, 3342433,       0]])

In [130]:
arr_0s_like = np.zeros_like(matrix_A) # same way ones_like, full_like
arr_0s_like

array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])

### np.arrange()

In [131]:
list(range(10))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [133]:
array_rng = np.arange(30)
array_rng

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [134]:
array_rng = np.arange(start=0,stop=30)
array_rng

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [135]:
array_rng = np.arange(start = 0, stop=30, step = 2.5, dtype = np.int32)
array_rng

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22])

## Random Generators 

### Defining Random Generators

In [136]:
from numpy.random import Generator as gen
from numpy.random import PCG64 as pcg

In [138]:
array_RG = gen(pcg())
array_RG.normal(size=4)

array([ 0.61593609, -0.89071956,  0.06851199, -1.65294582])

In [140]:
array_RG = gen(pcg())
array_RG.normal(size=(3,4))

array([[-0.72488251, -1.4791558 ,  0.90448087, -0.62859091],
       [ 0.05088523, -0.57805532, -2.1932932 ,  1.34990673],
       [ 1.9652516 , -0.74444712, -0.09115951,  1.91563722]])

In [141]:
array_RG = gen(pcg(seed = 365))
array_RG.normal(size=(3,4))

array([[-0.13640899,  0.09414431, -0.06300442,  1.05391641],
       [-0.6866818 , -0.50922173, -0.7999526 ,  0.73041825],
       [ 0.08825439, -2.1177576 ,  0.65526774, -0.48095012]])

 #### Generating integers  , Probabilities and Random Choice 

In [145]:
array_RG = gen(pcg(seed = 365))
array_RG.integers(low = 10 , high = 100 , size=(3,4))

array([[18, 78, 64, 78],
       [84, 66, 67, 28],
       [10, 69, 45, 15]], dtype=int64)

In [147]:
array_RG = gen(pcg(seed = 365))
array_RG.random(size=(3,4))

array([[0.75915734, 0.7662218 , 0.6291028 , 0.20336599],
       [0.66501486, 0.06559111, 0.71326309, 0.10812106],
       [0.87969046, 0.49405844, 0.82472673, 0.45652944]])

In [158]:
array_RG = gen(pcg(seed = 365))
array_RG.choice([1,5,3,2,4],p = [0.1,0.1,0.1,0.1,0.2] ,size=(3,4))

ValueError: probabilities do not sum to 1

### Statistics with Numpy

In [159]:
import numpy as np

### np.mean()

In [160]:
matrix_a = np.array([[1,0,4,3,6],[3,5,4,7,1],[4,3,1,2,7]])
matrix_a

array([[1, 0, 4, 3, 6],
       [3, 5, 4, 7, 1],
       [4, 3, 1, 2, 7]])

In [161]:
np.mean(matrix_a)

3.4

In [165]:
np.mean(matrix_a[0])

2.8

In [167]:
np.mean(matrix_a[:,0])

2.6666666666666665

In [168]:
np.mean(matrix_a,axis = 0) # row

array([2.66666667, 2.66666667, 3.        , 4.        , 4.66666667])

In [169]:
np.mean(matrix_a, axis = 1) # column

array([2.8, 4. , 3.4])

In [170]:
np.mean(matrix_a,axis = 0, dtype = np.int64)

array([2, 2, 3, 4, 4], dtype=int64)

### Min and Max

In [175]:
matrix_a = np.array([[1,0,4,3,6],[3,5,4,7,1],[4,3,1,2,7]])
matrix_a

array([[1, 0, 4, 3, 6],
       [3, 5, 4, 7, 1],
       [4, 3, 1, 2, 7]])

In [176]:
np.min(matrix_a)

0

In [177]:
np.amin(matrix_a)

0

In [178]:
np.minimum(matrix_a[0], matrix_a[2])

array([1, 0, 1, 2, 6])

In [179]:
np.minimum.reduce(matrix_a)

array([1, 0, 1, 2, 1])

In [181]:
np.min(matrix_a,axis = 0) # same np.minimum.reduce(matrix_a)

array([1, 0, 1, 2, 1])

In [182]:
np.max(matrix_a)

7

### Statistical Order Function

In [183]:
import numpy as np

In [184]:
matrix_a = np.array([[1,0,4,3,6],[3,5,4,7,1],[4,3,1,2,7]])
matrix_a

array([[1, 0, 4, 3, 6],
       [3, 5, 4, 7, 1],
       [4, 3, 1, 2, 7]])

In [185]:
np.ptp(matrix_a)  # ptp = peak to peak , Returns the difference between the highest and lowest values within an array

7

In [186]:
np.ptp(matrix_a, axis = 0)

array([3, 5, 3, 5, 6])

In [187]:
np.ptp(matrix_a, axis = 1)

array([6, 6, 6])

In [190]:
np.sort(matrix_a, axis= None)

array([0, 1, 1, 1, 2, 3, 3, 3, 4, 4, 4, 5, 6, 7, 7])

In [191]:
np.percentile(matrix_a, 70) # 70%

4.0

In [192]:
np.percentile(matrix_a, 70, interpolation='higher') # Lower, midpoint, nearest

4

In [196]:
np.quantile(matrix_a, 0.70, interpolation='nearest')

4