# Module : NUMPY

## 1. Intro - imports

In [239]:
# import the package

import numpy as np

In [240]:
# check the version

np.__version__

'1.20.3'

In [241]:
# Obtaining inline help using the TAB completion
# Uncomment the following line, delete the word tab, and hit TAB after . 

#np.<TAB>

Object `np.<TAB>` not found.


In [242]:
# obtaining inline help using a wildcard - search for methods, attributes that contain 'con'

np.*con*?

## 2 Creating arrays

### 2.1 np.array()  function

In [243]:
#create an array rom a Python 1D list

mylist = [10,20,30,40]
arr1 = np.array(mylist)
arr1

array([10, 20, 30, 40])

In [244]:
#create an array from a Python 2D list

mat1 = [[1,2,3],[4,5,6],[7,8,9],[10,11,12]]
arr2 = np.array(mat1)
arr2

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [245]:
# what type of object is arr2? 
# It belongs to the a class called 'ndarray' in the numpy module
type(arr2)

numpy.ndarray

In [246]:
# create an array from a 3D list
a_3d_array = np.array([[[1,2],
                        [3,4]],
                       [[5,6],
                        [7,8]]])

In [247]:
#what is the shape of the array?
a_3d_array.shape

(2, 2, 2)

In [248]:
a_3d_array

array([[[1, 2],
        [3, 4]],

       [[5, 6],
        [7, 8]]])

### 2.2 Specifying the datatype of the array contents - dtype

We can specify the data type of the array contents using the parameter dtype\
It can be specified in a few different ways

In [249]:
arr1 = np.array([10,20,30,40], dtype=float)
arr1

array([10., 20., 30., 40.])

In [250]:
arr1 = np.array([10,20,30,40], dtype=np.float32)
arr1

array([10., 20., 30., 40.], dtype=float32)

In [251]:
arr2 = np.array([[1,2,3],[4,5,6],[7,8,9]], dtype='f4')
arr2

array([[1., 2., 3.],
       [4., 5., 6.],
       [7., 8., 9.]], dtype=float32)

In [252]:
arr2.astype(np.int16)
arr2.astype('i2')


array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]], dtype=int16)

### 2.3 Some ndarray attributes

In [253]:
arr1.dtype

dtype('float32')

In [254]:
arr1.ndim

1

In [255]:
arr1.size

4

In [256]:
arr1.shape

(4,)

In [370]:
arr2.shape

(2, 3)

In [373]:
# to get the number of rows
print(arr2.shape[0])
# to get the number of columns
print(arr2.shape[1])

2
3


### 2.4 Re-shaping arrays: reshape()

In [258]:
# arr2 is a (3,3) array
print(arr2)

[[1. 2. 3.]
 [4. 5. 6.]
 [7. 8. 9.]]


In [259]:
# same as arr2.reshape(1,9)- # re-shape into 1 row and as many columns as needed
arr2.reshape(1,-1)    

array([[1., 2., 3., 4., 5., 6., 7., 8., 9.]], dtype=float32)

In [260]:
# re-shape into 3 rows and as many columns as needed
arr2.reshape(3, -1)    

array([[1., 2., 3.],
       [4., 5., 6.],
       [7., 8., 9.]], dtype=float32)

## 3. Some special ndarrays

### 3.1 zeros(), ones()

one required positional parameter is the ***shape*** of the array to create as an int or tuple\
default ***dtype*** is float


In [261]:
# create a 1D array of 0s
np.zeros(3)

array([0., 0., 0.])

In [262]:
# create a 2D array of zeros
np.zeros((3,4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [263]:
# create a 2D array of 1's
np.ones((2,2))

array([[1., 1.],
       [1., 1.]])

### 3.2 full(), fill_diagonal()

**np.full()** create arrays with filled intial values\
required parameters are ***shape*** of the array and the ***fill_value***

In [264]:
np.full((2,2), fill_value = np.NINF)

array([[-inf, -inf],
       [-inf, -inf]])

In [265]:
arr = np.full((3,3), fill_value = -1.0)
arr

array([[-1., -1., -1.],
       [-1., -1., -1.],
       [-1., -1., -1.]])

**np.fill_diagonal()** creats arrays with specified value on the diagonal of the given array\
required parameters are the ***array*** to fill and the value ***val***


In [266]:
np.fill_diagonal(arr, 0)
arr

array([[ 0., -1., -1.],
       [-1.,  0., -1.],
       [-1., -1.,  0.]])

### 3.3 eye()

Creates an identity matrix - one required parameter is ***N*** or size of square matrix


In [267]:
#create a (4,4) identity matrix
np.eye(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

### 3.4 arange()
produces an array of evenly spaced values within a given range \
arange, like Python range, accepts a ***start***, ***stop*** (only required) and ***stride/step*** parameter \
stop value is not inclusive


In [268]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [269]:
np.arange(0,10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [270]:
np.arange(0,11,2)

array([ 0,  2,  4,  6,  8, 10])

In [271]:
np.arange(0,1,0.1)

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

### 3.5 linspace(), np.logspace()

**np.linspace()** produces a specified/fixed number of elements between two inclusive values evenly spaced on a linear scale\
**np.logspace()** produces a specified/fixed number of elements between two inclusive values evenly spaced on a log scale


In [272]:
# produce an array of 3 evenly-spaced values between 0 and 5 inclusive.
np.linspace(0,5,3)

array([0. , 2.5, 5. ])

In [273]:
np.linspace(0,1, 10).reshape(5,-1)

array([[0.        , 0.11111111],
       [0.22222222, 0.33333333],
       [0.44444444, 0.55555556],
       [0.66666667, 0.77777778],
       [0.88888889, 1.        ]])

In [274]:
# produce an array of 10 evenly-spaced values between 10^***start*** and 10^***stop**
np.logspace(0, 1, 10)

array([ 1.        ,  1.29154967,  1.66810054,  2.15443469,  2.7825594 ,
        3.59381366,  4.64158883,  5.9948425 ,  7.74263683, 10.        ])

### 3.6 Random arrays using np.random

**np.random** module contains many functions that allows us to work with pseudo-random numbers

In [275]:
import random

# Set the seed for repeatability - seed is any positive int of your choosing

np.random.seed(1234)      #global- sets the seed for all NumPy random operations

r = np.random.RandomState(1234)     #non-global - will ony use the seed for
                                    #this specific object r

#### np.random.rand()
This picks values from the continuous uniform distribution [0,1)\
the arg here is either a single int or a tuple

In [276]:
np.random.seed(1234)
print(np.random.rand(3))
print(np.random.rand(3))
print()
np.random.seed(1234)
print(np.random.rand(3))
print(np.random.rand(3))
print()

# the above two sequences are the same as they use the same seed
# the following will be different as I change the seed
np.random.seed(12345)
print(np.random.random(3))
print(np.random.random(3))
print()

[0.19151945 0.62210877 0.43772774]
[0.78535858 0.77997581 0.27259261]

[0.19151945 0.62210877 0.43772774]
[0.78535858 0.77997581 0.27259261]

[0.92961609 0.31637555 0.18391881]
[0.20456028 0.56772503 0.5955447 ]



In [277]:
# To pull values from continuous uniform distribution - Uniform(5,8)
(8-5)* np.random.random((2,4)) + 5

array([[7.89354356, 6.95953129, 7.24671991, 6.96070961],
       [7.24314443, 7.88392021, 5.02516489, 5.31933313]])

#### np.random.randn()
pick random numbers from standard normal distribution (mean = 0, s.d. =1)

In [278]:
# Return a sample (or samples) from the "standard normal" distribution. 
np.random.randn(3)

array([ 1.00718936, -1.29622111,  0.27499163])

In [279]:
np.random.randn(3,3)

array([[ 0.22891288,  1.35291684,  0.88642934],
       [-2.00163731, -0.37184254,  1.66902531],
       [-0.43856974, -0.53974145,  0.47698501]])

#### np.random.normal()
pick random numbers from normal distribution of specified mean and s.d.

In [280]:
np.random.normal(10, 2, (3,3))

array([[16.49788784,  7.95754495,  8.84582539],
       [10.24824255, 10.60522712, 11.04754414],
       [10.00188056, 12.68761959,  8.57291203]])

#### np.random.randint()
pick random integers from discrete uniform distribution of specified interval \
Return random integers  from (start, end, shape)

In [281]:
# if only one arg is given, it is assumed to be the end value.
np.random.randint(10)

1

In [282]:
rand1 = np.random.randint(1,100,10)
rand1

array([64,  1, 91, 45, 58, 15, 27, 44, 31, 33])

In [283]:
rand2 = np.random.randint(1,100,(3,3))
rand2

array([[ 4, 65,  7],
       [67, 50, 38],
       [73, 44, 63]])

## 4. Array indexing

### 4.1 Indexing
index starts counting from 0\
can also use negative indexing, where the last index is -1 and count backwards

In [284]:
arr1 = np.arange(6).reshape(2,3)
arr1

array([[0, 1, 2],
       [3, 4, 5]])

In [285]:
# for 2D arrays, need 2 indices to pick an element
# first index represents rows (down), second index represents columns (across)
arr1[0,2]

2

In [286]:
# accessing an entire row - two ways to write, the comma and colon for cols is optional
arr1[0]     
# or
arr1[0,:]

array([0, 1, 2])

In [287]:
# access the last row
arr1[-1]

array([3, 4, 5])

In [288]:
# when accessing an entire column, colon and comma is required for row, not optional
arr1[:, 1]   

array([1, 4])

#### Changing  values of arrays in-place

assignment using = changes the values in an array in-place (i.e. doesnt make a copy of the array)

In [289]:
arr2[0, 1] = 10
arr2

array([[ 1., 10.,  3.],
       [ 4.,  5.,  6.],
       [ 7.,  8.,  9.]], dtype=float32)

In [290]:
arr2[1] = [9,10,11]
arr2

array([[ 1., 10.,  3.],
       [ 9., 10., 11.],
       [ 7.,  8.,  9.]], dtype=float32)

In [291]:
arr2[:, 1] = [15,16,17]
arr2

array([[ 1., 15.,  3.],
       [ 9., 16., 11.],
       [ 7., 17.,  9.]], dtype=float32)

### 4.2 Slicing
we can obtain non-contiguous or contiguous sub-set of values \
default step size is 1

In [292]:
arr2

array([[ 1., 15.,  3.],
       [ 9., 16., 11.],
       [ 7., 17.,  9.]], dtype=float32)

In [293]:
# get row 0 and columns 0-1
arr2[:1,:2]

array([[ 1., 15.]], dtype=float32)

In [294]:
# get  rows 0-1 and all columns 0-2
arr2[0:2, :]

array([[ 1., 15.,  3.],
       [ 9., 16., 11.]], dtype=float32)

In [295]:
# get all rows, and columns starting at 0, and getting every 2nd column, 
# but not including or going over col index 3
arr2[:, 0:3:2]

array([[ 1.,  3.],
       [ 9., 11.],
       [ 7.,  9.]], dtype=float32)

### 4.3 Fancy indexing
we can choose the indices we want by providing  a list for each dimension

In [296]:
# get all the columns for rows 0 and 2
arr2[[0,2], :]

array([[ 1., 15.,  3.],
       [ 7., 17.,  9.]], dtype=float32)

fancy indexing on multiple dimensions \
if used, each list of indices for each dimension has to have the same length, n \
[row1,row2...,rown] [col1, col2, ..coln] \
and the list size tells us how many elements will be obtained. In the below code, n = 2 \
the coords are then read as a tuple (pair) to pick elements" (row1, col1), (row2, col2), so on...



In [297]:
arr2[[0,2], [2,1]]

array([ 3., 17.], dtype=float32)

### 4.4 Boolean indexing
allows us to select values from an array that satisy a condition. This returns to us a flattened array (single dimension).

In [298]:
arr2

array([[ 1., 15.,  3.],
       [ 9., 16., 11.],
       [ 7., 17.,  9.]], dtype=float32)

In [299]:
# boolean indexing when applied to an array will return a view as a 1D array
mask = arr2 > 10
print(mask)
arr2[mask]

[[False  True False]
 [False  True  True]
 [False  True False]]


array([15., 16., 11., 17.], dtype=float32)

In [300]:
mask = (arr2 > 10) & (arr2 % 2 !=0)
mask

array([[False,  True, False],
       [False, False,  True],
       [False,  True, False]])

In [301]:
arr2[mask]

array([15., 11., 17.], dtype=float32)

## 4.5 Applications of indexing

### np.newaxis
add a new axis (axes) to the data \
for e.g. can change from 1D to 2D or higher


In [349]:
x = np.array([0,1,2])
print(x , end = "\n\n")

print(x[: , np.newaxis], end = "\n\n")

print(x[np.newaxis , :])

[0 1 2]

[[0]
 [1]
 [2]]

[[0 1 2]]


In [355]:
# let's check the shapes of the arrays created above.

x = np.array([0,1,2])
print(x.shape)

x1 = x[: , np.newaxis]
print(x1.shape)

x2 = x[np.newaxis , :]
print(x2.shape)

(3,)
(3, 1)
(1, 3)


### Join or concatenate arrays using np.concatenate(), np.c_

In [374]:
arr1 = np.array([1, 2, 3])
arr2 = np.array([4, 5, 6])

np.concatenate((arr1, arr2))


array([1, 2, 3, 4, 5, 6])

In [358]:
# to join as two rows, we first need to create a new row dimension for each 1D array
# and then concatenate rows by using axis = 0 (along rows)

np.concatenate((arr1[np.newaxis, :], arr2[np.newaxis, :]), axis = 0)

array([[1, 2, 3],
       [4, 5, 6]])

In [359]:
# to join as two rows, we first need to create a new col dimension for each 1D array
# and then concatenate rows by using axis = 1 (along columns)
# useful for adding features to your data

np.concatenate((arr1[:, np.newaxis], arr2[:, np.newaxis]), axis = 1)

array([[1, 4],
       [2, 5],
       [3, 6]])

In [378]:
# Easier command to join columns - only works if the arrays being concatenated are 1-D
# concatenates works for all dimensions.

# notice the [] instead of ()
#np.c_[] first converts each array into a column array and then concatenates the columns

np.c_[arr1, arr2]

array([[1, 4],
       [2, 5],
       [3, 6]])

## 5. Array Operations

### 5.1 Mathematical operations

#### Unary ufuncs

These are called as **np.functionname(arrayname, optional args)**


In [363]:
arr = np.arange(6, dtype=float)
arr1 = arr.reshape(2,3)
arr1

array([[0., 1., 2.],
       [3., 4., 5.]])

In [364]:
np.sqrt(arr1)

array([[0.        , 1.        , 1.41421356],
       [1.73205081, 2.        , 2.23606798]])

In [366]:
arr2 = np.log(arr1+.1)
arr2

array([[-2.30258509,  0.09531018,  0.74193734],
       [ 1.13140211,  1.41098697,  1.62924054]])

In [367]:
np.round(arr2, 3)

array([[-2.303,  0.095,  0.742],
       [ 1.131,  1.411,  1.629]])

#### Binary ufuncs
arithmetic functions are called as **np.functionname(x, y)**\
where x and y can be arrays or scalars.\
np.add(x, y), np.subtract(x, y), np.multiply(x, y), np.divide(x, y), np.mod(x, y), np.power(x, y) etc

In [311]:
arrA = np.arange(4, dtype = 'f8')
arrA = arrA.reshape(2,2)
arrA

array([[0., 1.],
       [2., 3.]])

In [312]:
arrB = np.random.randn(2,2)
arrB

array([[-0.83115354, -0.33692207],
       [-0.00468022, -0.76023409]])

In [313]:
# two ways to call the operator
print(arrA + arrB)       
np.add(arrA, arrB)

[[-0.83115354  0.66307793]
 [ 1.99531978  2.23976591]]


array([[-0.83115354,  0.66307793],
       [ 1.99531978,  2.23976591]])

In [314]:
print(arrA * arrB)       
np.multiply(arrA, arrB)

[[-0.         -0.33692207]
 [-0.00936045 -2.28070228]]


array([[-0.        , -0.33692207],
       [-0.00936045, -2.28070228]])

### 5.2 Logical operations - binary ufuncs

logical functions are called using **np.functionname(x, y)**\
where x and y are arrays or scalars\
np.greater(), np.greater_equal(), np.less(), np.less_equal(), np.equal(), np.not_equal()\
or x > y, x >= y, x < y, x <= y, x == y, x!= y


In [315]:
print(arrA > arrB)
np.greater(arrA, arrB)

[[ True  True]
 [ True  True]]


array([[ True,  True],
       [ True,  True]])

### 5.3 Matrix operations

In [316]:
# generate a random array
arrM = np.random.randn(2, 3)
arrM

array([[-1.24611253,  0.27884551, -0.47320038],
       [-1.74212532, -0.96808228, -0.70962448]])

In [317]:
# matrix transpose
arrM.T

array([[-1.24611253, -1.74212532],
       [ 0.27884551, -0.96808228],
       [-0.47320038, -0.70962448]])

In [318]:
# this is multiplication ufunc - multiples elementwise
arrM * arrM

array([[1.55279643, 0.07775482, 0.2239186 ],
       [3.03500065, 0.93718331, 0.50356691]])

Matrix multiplication can be done in a few different ways, using **arrayname.dot()** \
or using **np.matmul()** which is also written using **@ infix operator**

*Rule of matrix multiplication:* \
multiplying an (a, b) with (b,c) gives (a,c) matrix \
Note that the column of first array = rows of second array\
cannot multiply if the cols of the first != rows of the second.

In [381]:
# (2x3) matrix multiplied by (3x2) matrix produces a (2x2) matrix

np.matmul(arrM, arrM.T)

array([[1.85446985, 2.23673337],
       [2.23673337, 4.47575086]])

In [320]:
arrM @ arrM.T

array([[1.85446985, 2.23673337],
       [2.23673337, 4.47575086]])

In [321]:
np.dot(arrM, arrM.T)

array([[1.85446985, 2.23673337],
       [2.23673337, 4.47575086]])

In [323]:
# multiplying (3x2) matrix by (2x3) matrix produces (3x3) matrix
np.matmul(arrM.T, arrM)

array([[4.58779707, 1.33904778, 1.82591571],
       [1.33904778, 1.01493813, 0.55502509],
       [1.82591571, 0.55502509, 0.72748551]])

In [324]:
# cannot perform this matrix multiplication below since the inner size dimensions have to be equaal
# uncomment to see the error

# np.matmul(arrM, arrM)

**numpy.linalg** sub-module contains many built-in functions for linear algebra algorithms.

### 5.4 Statistical operations

#### Reduction operations - reduce to a single value(s)
These are functions that can be called as a NumPy function - **np.functionname(x)**\
or as a method of the ndarray object - i.e. **arrayname.functionname()**\
They have an optional  parameter called ***axis*** to specify which axis to perform the operation on\
If no axis is specified reduction operations reduce over all values

In [384]:
# Drawing from a normal distribution of specified mean and sigma

myarr = np.random.normal(1, 1.5, (3,4))
myarr

array([[ 0.14789185,  1.05446143,  3.2782818 ,  1.74530181],
       [ 2.9183085 , -2.30742812, -0.35055017,  1.59896043],
       [ 2.15320122,  3.33297402,  1.0244316 ,  0.63260017]])

In [385]:
np.sum(myarr)

15.228434545134222

In [386]:
# get the sum along axis 1 , which means perform operations within each row
np.sum(myarr, axis =1)

array([6.2259369 , 1.85929064, 7.14320701])

In [328]:
np.prod(myarr)

-35.67933255721179

In [395]:
np.mean(myarr)

1.2690362120945184

In [387]:
np.mean(myarr, axis=1)

array([1.55648422, 0.46482266, 1.78580175])

In [388]:
# axis = 0 means perform operations within each column
np.max(myarr, axis =0)

array([2.9183085 , 3.33297402, 3.2782818 , 1.74530181])

In [389]:
np.min(myarr, axis =1)

array([ 0.14789185, -2.30742812,  0.63260017])

In [390]:
# the index of the minimum value in the array if it were flattened
myarr.argmin()

5

In [391]:
# the index of the maximum value in the array if it were flattened
myarr.argmax()

9

In [396]:
# provides the value at the specified percentile in a given array
np.percentile(arr2, q=.30)

4.006

#### Accumulation operations - store all intermediate values

In [336]:
myarr = np.arange(10).reshape(5, -1)
print(myarr)
myarr.cumsum(axis=0)

[[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]


array([[ 0,  1],
       [ 2,  4],
       [ 6,  9],
       [12, 16],
       [20, 25]], dtype=int32)

In [337]:
myarr.cumprod(axis=1)

array([[ 0,  0],
       [ 2,  6],
       [ 4, 20],
       [ 6, 42],
       [ 8, 72]], dtype=int32)

## 5.5 Other useful NumPy methods

### np.all()

In [338]:
# checks whether all values are equal to True (non-zero)
np.all(myarr)

False

### np.unique()

In [339]:
# an array of the unique values found
np.unique(np.array([[1,2,3], [3,4,5], [1,3,6]]))

array([1, 2, 3, 4, 5, 6])

### np.bincount()

In [340]:
# count how many occurrences there are of integers 0,1,2,......highest number in the array.
np.bincount(np.array([1,2,3,3,4,5,1,3,6]))

array([0, 2, 1, 3, 1, 1, 1], dtype=int64)

### np.where()

In [341]:
# np.where(cond, val if True, val if False) 
# performs an if-then-else operation on NumPy arrays

anarr = np.array([1,2,3,4,5,6]).reshape(3,2)
print(anarr)
np.where(anarr> np.mean(anarr), 1, anarr)

[[1 2]
 [3 4]
 [5 6]]


array([[1, 2],
       [3, 1],
       [1, 1]])

### np.flatten(), np.ravel()

flatten always returns a copy. slower.\
ravel returns a view of the original array whenever possible.

In [342]:
# np.flatten() - creates a 1D array

anarr.flatten()

array([1, 2, 3, 4, 5, 6])

In [343]:
# np.ravel() - creates a 1D array

anarr.ravel()

array([1, 2, 3, 4, 5, 6])

### np.sort(), np.argsort()
both these take an argumnet called axis
if axis = 0 sort down rows (within each columnn)
if axis = 1 sort across columns (within each row)
default value for axis = -1, which means use the last axis to sort within (across columns)

In [402]:
anarr2 = np.random.randint(1, 50, (3,3))
print(anarr2)

# sort within rows
anarr2.sort(axis=0)
anarr2

[[42 28 19]
 [16 14 32]
 [28 30 23]]


array([[16, 14, 19],
       [28, 28, 23],
       [42, 30, 32]])

In [398]:
anarr2 = np.random.randint(1, 50, (3,3))
print(anarr2)
anarr2.argsort()

[[40 31  6]
 [36  1 32]
 [39 13  4]]


array([[2, 1, 0],
       [1, 2, 0],
       [2, 1, 0]], dtype=int64)

## 6. Timing Operations

### Why NumPy? Say you want to multiply two large lists/arrays of ints 
Let's look at the speed differences between using standard (pure) Python and NumPy

In [347]:
# Here we will use pure Python to perform the computation
# so let us import the Python module - random

list1 = [random.randint(1,100) for _ in range(10000)] 
list2 = [random.randint(1,100) for var in range(10000)]  

#%timeit list1times2 = [x * y for x,y in zip(list1, list2)]
%timeit list1times2 = [x * y for x in list1 for y in list2]

9.47 s ± 965 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [348]:
# Here we will perform the computation using NumPy
# so we will use np.random module's psuedo-random functions

list1 = np.random.randint(1, 100, 100000)
list2 = np.random.randint(1, 100, 100000)

%timeit list1times2 = list1 * list2

107 µs ± 4.07 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


# The end