# Lab 1: Python, NumPy and vectorisation

Numpy docs : https://numpy.org/doc/stable/user/index.html

Numpy arrays:

- 1D array of shape `(n,)` has elements indexed from [0] to [n-1] --> vectors
- 2D array of shape `(m,n)` has elements in rows indexed from [0] to [m-1] and elements in columns indexed from [0] to [n-1] --> matrices


In [1]:
import numpy as np
import time 

### Creating vectors and matrices

In [13]:
a = np.zeros(4)
b = np.ones((4,))
c = np.zeros((3,4))
d = np.eye(3)
print(f"a = {a}, a shape = {a.shape}, a data type = {a.dtype}")
print(f"b = {b}, b shape = {b.shape}, b data type = {b.dtype}")
print(f"c = {c}, c shape = {c.shape}, c data type = {c.dtype}")
print(f"d = {d}, d shape = {d.shape}, d data type = {d.dtype}")

a = [0. 0. 0. 0.], a shape = (4,), a data type = float64
b = [1. 1. 1. 1.], b shape = (4,), b data type = float64
c = [[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]], c shape = (3, 4), c data type = float64
d = [[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]], d shape = (3, 3), d data type = float64


In [21]:
a1 = np.random.random(5) #takes as argument only the size of the array
a2 = np.random.random((3,4))
print(a1)
print(a2)

[0.90264958 0.82968079 0.70225588 0.95543027 0.08306786]
[[0.73704379 0.8761941  0.81664683 0.68922314]
 [0.2914514  0.71552981 0.42943151 0.89906041]
 [0.22525213 0.02963834 0.82904587 0.08647421]]


In [25]:
b1 = np.random.randint(0,20,5) #takes as arguments the lower bound, upper bound and size of the array
b2 = np.random.randint(0,20,(3,4))   
print(b1)
print(b2)

[11  9 16 11 11]
[[ 3  3 19  6]
 [ 7 16 10  2]
 [ 9  4  1 17]]


In [30]:
c1 = np.arange(10)
c2 = np.arange(10,20,2)
print(c1)
print(c2)

[0 1 2 3 4 5 6 7 8 9]
[10 12 14 16 18]


In [31]:
print(np.random.rand(5)) # Random numbers from a uniform distribution over [0, 1)

[0.98543929 0.33632598 0.37343985 0.11593648 0.52161865]


In [34]:
myvector = np.array([2,4,6,8,10])
print(myvector)
print(myvector.shape)
mymatrix = np.array([[2,4,6], [9,6,2], [4,4,5], [9,2,7]])
print(mymatrix)
print(mymatrix.shape)

[ 2  4  6  8 10]
(5,)
[[2 4 6]
 [9 6 2]
 [4 4 5]
 [9 2 7]]
(4, 3)


### Operations on vectors

#### 1. Indexing

Indexing means referring to an element of an array by its position within the array. NumPy starts indexing at 0.

In [None]:
vec1 = np.arange(10)
print(vec1)
print(f"3rd element of vec1 = {vec1[2]}")
print(f"last element of vec1 = {vec1[-1]}") #negative indexes count from the end of the array

[0 1 2 3 4 5 6 7 8 9]
3rd element of vec1 = 2
last element of vec1 = 9


#### 2. Slicing

Slicing means getting a subset of elements from an array based on their indices.

Slicing creates an array of indices using a set of three values `(start:stop:step)`

In [46]:
print(f"vec1 = {vec1}")
print("vec1[2:7:1] =", vec1[2:7:1]) #access 5 consecutive elements, start at index 2, stop before index 7, step by 1
print("vec1[2:7:2] =", vec1[2:7:2]) #start at index 2, stop before index 7, step by 2
print("vec1[3:] =", vec1[3:]) #access elements starting from index 3 to the end of the array
print("vec1[:3] =", vec1[:3]) #access elements starting from the beginning of the array to index 3 (not included)
print("vec1[::2] =", vec1[::2]) #access every second element of the array
print("vec1[::-1] =", vec1[::-1]) #access the array in reverse order, i.e, reverse the array
print("vec1[7:2:-1] =", vec1[7:2:-1]) #access elements starting from index 7 to index 2 (not included) in reverse order
print("vec1[:] =", vec1[:]) #access all elements of the array

vec1 = [0 1 2 3 4 5 6 7 8 9]
vec1[2:7:1] = [2 3 4 5 6]
vec1[2:7:2] = [2 4 6]
vec1[3:] = [3 4 5 6 7 8 9]
vec1[:3] = [0 1 2]
vec1[::2] = [0 2 4 6 8]
vec1[::-1] = [9 8 7 6 5 4 3 2 1 0]
vec1[7:2:-1] = [7 6 5 4 3]
vec1[:] = [0 1 2 3 4 5 6 7 8 9]


#### 3. Single vector operations

In [None]:
print(f"vec1 = {vec1}")
vec2 = -vec1
print(f"vec2 = {vec2}") #negate all elements of the array
vec3 = vec1 + 5
print(f"vec3 = {vec3}") #add 5 to all elements of the array
vec4 = vec1 * 2
print(f"vec4 = {vec4}") #multiply all elements of the array by 2, i.e, scaling vector by a scalar
vec5 = vec1 / 2
print(f"vec5 = {vec5}") #divide all elements of the array by
vec6 = np.sum(vec1)
print(f"vec6 = {vec6}") #sum all elements of the array
vec7 = np.mean(vec1)
print(f"vec7 = {vec7}") #mean of all elements of the array
vec8 = np.std(vec1)
print(f"vec8 = {vec8}") #standard deviation of all elements of the array
vec9 = np.var(vec1)
print(f"vec9 = {vec9}") #variance of all elements of the array
vec10 = vec1**2
print(f"vec10 = {vec10}") #square all elements of the array

vec1 = [0 1 2 3 4 5 6 7 8 9]
vec2 = [ 0 -1 -2 -3 -4 -5 -6 -7 -8 -9]
vec3 = [ 5  6  7  8  9 10 11 12 13 14]
vec4 = [ 0  2  4  6  8 10 12 14 16 18]
vec5 = [0.  0.5 1.  1.5 2.  2.5 3.  3.5 4.  4.5]
vec6 = 45
vec7 = 4.5
vec8 = 2.8722813232690143
vec9 = 8.25
vec10 = [ 0  1  4  9 16 25 36 49 64 81]


#### 4. Vector-vector element wise operations

Most of the NumPy arithmetic, logical and comparison operations apply to vectors as well. These operators work on an element-by-element basis. 

For example: $c_i = a_i + b_i$


In [None]:
vec1 = np.random.randint(0,20,5)
print(f"vec1 = {vec1}")
vec2 = np.random.randint(0,20,5)
print(f"vec2 = {vec2}")
print(f"vec1 + vec2 = {vec1 + vec2}") #add two vectors element-wise
print(f"vec1 - vec2 = {vec1 - vec2}") #subtract two vectors element-wise
print(f"vec1 * vec2 = {vec1 * vec2}") #multiply two vectors element-wise
print(f"vec1 / vec2 = {vec1 / vec2}") #divide two vectors element-wise
print(f"vec1 ** vec2 = {vec1 ** vec2}") #raise each element of vec1 to the power of the corresponding element in vec2
print(f"vec2 % vec1 = {vec2 % vec1}") #modulus, i.e, remainder




vec1 = [ 8  7 14 10 17]
vec2 = [12  6 18 17  5]
vec1 + vec2 = [20 13 32 27 22]
vec1 - vec2 = [-4  1 -4 -7 12]
vec1 * vec2 = [ 96  42 252 170  85]
vec1 / vec2 = [0.66666667 1.16666667 0.77777778 0.58823529 3.4       ]
vec1 ** vec2 = [         0     117649  381943808 1569325056    1419857]
vec2 % vec1 = [4 6 4 7 5]


Dot product of two vectors: 

The dot product multiplies the values in two vectors element-wise and then sums the result. Vector dot product requires the dimensions of the two vectors to be the same.

In [57]:
#Dot product of two vectors
dp = np.dot(vec1, vec2)
print(f"Dot product of vec1 and vec2 = {dp}") #dot product of two vectors

Dot product of vec1 and vec2 = 645


Manual code for dot product without using numpy using `for` loop

In [None]:
a = np.arange(5,9)
print(f"a = {a}")
b = np.arange(2,6)
print(f"b = {b}")

def dotprod(a,b):
    """
    Compute dot prodcut of vectors a and b
    Arguments:
    a - ndarray (n,)
    b - ndarray (n,)

    Returns dotprod (scalar)
    """
    if len(a) != len(b):
        raise ValueError("Vectors must be of the same length")
    sum = 0 
    for i in range(len(a)):
        sum += a[i] * b[i]
    return sum

print(f"dp manual = {dotprod(a,b)}")
print(f"dp numpy = {np.dot(a,b)}") 

a = [5 6 7 8]
b = [2 3 4 5]
dp manual = 96
dp numpy = 96


Numpy methods is way faster than manual looping, which is the benefit of vectorization. This is because NumPy makes better use of available data parallelism in the underlying hardware. GPU's and modern CPU's implement Single Instruction, Multiple Data (SIMD) pipelines allowing multiple operations to be issued in parallel. This is critical in Machine Learning where the data sets are often very large.

In [64]:
np.random.seed(1)
a = np.random.rand(10000000)  # very large arrays
b = np.random.rand(10000000)

tic = time.time()  # capture start time
c = np.dot(a, b)
toc = time.time()  # capture end time

print(f"np.dot(a, b) =  {c:.4f}")
print(f"Vectorized version duration: {1000*(toc-tic):.4f} ms ")

tic = time.time()  # capture start time
c = dotprod(a,b)
toc = time.time()  # capture end time

print(f"dotprod(a, b) =  {c:.4f}")
print(f"loop version duration: {1000*(toc-tic):.4f} ms ")

del(a);del(b)  #remove these big arrays from memory

np.dot(a, b) =  2501072.5817
Vectorized version duration: 11.6107 ms 
dotprod(a, b) =  2501072.5817
loop version duration: 4582.6597 ms 


### Matrices

2D matrices will be used to hold training data, which is **m** examples by **n** features, creating an **(m,n)** array. 

Course 1 will not do operations directly on matrices but typically extracts an example as a vector and operates on that

<figure>
    <center> <img src="..\images\C1_W2_Lab04_Matrices.PNG"  alt='missing'  width=900><center/>
    <figcaption> Generic Matrix Notation, 1st index is row, 2nd is column </figcaption>
<figure/>

- Going forward, our examples will be stored in a 2D array, `X_train` of dimension (m,n).
- `w` will be a 1D vector of shape (n,).
- We will perform operations by looping through the examples, extracting each example to work on individually by indexing X. For example:`X[i]`
- `X[i]` returns a value of shape (n,), a 1-dimensional vector. Consequently, operations involving `X[i]` are often vector-vector.  

In [None]:
# show common Course 1 example
X = np.array([[1],[2],[3],[4]])
print(f"X = {X}")
print(f"X has shape {X.shape}")
print(f"X[1] = {X[1]}")
print(f"X[1] has shape {X[1].shape}")
w = np.array([2])
print(f"w = {w}")
print(f"w has shape {w.shape}")
c = np.dot(X[1], w)
print(f"c = {c}")
print(f"c has shape {c.shape}")

X = [[1]
 [2]
 [3]
 [4]]
X has shape (4, 1)
X[1] = [2]
X[1] has shape (1,)
w = [2]
w has shape (1,)
c = 4
c has shape ()


### Operations on matrices

#### 1. Indexing

Matrices include a second index. The two indexes describe [row, column]. Access can either return an element or a row/column.

In [73]:
mat1 = np.arange(6).reshape(-1,2)
print(f"mat1.shape = {mat1.shape}")

mat1.shape = (3, 2)


1. `np.arange(6)`:
- The `arange()` function from the NumPy library generates a sequence of equally spaced values. The sequence starts from 0 (by default) up to, but not including, 6.
- It creates a 1-dimensional NumPy array, which would be [0, 1, 2, 3, 4, 5]
2. `.reshape(-1, 2)`

The `reshape()` method changes the array's shape (dimensions) without altering the data.

The arguments to `reshape()` specify the new dimensions of the array. In this case, (-1, 2):
- 2: This explicitly sets the number of columns in the new array to 2.
- -1: This placeholder tells NumPy to automatically calculate the number of rows needed to accommodate all the elements from the original array, given that the number of columns is fixed at 2

3. The original array [0, 1, 2, 3, 4, 5] has 6 elements. Reshaping it into a matrix with 2 columns means that 3 rows are needed (3 rows * 2 columns = 6 elements)


In [74]:
print(f"mat1 = {mat1}")

mat1 = [[0 1]
 [2 3]
 [4 5]]


In [76]:
#access an element
print(f"mat1[0,0] = {mat1[0,0]}")  #access first row, first column
print(f"mat1[2,1] = {mat1[2,1]}") #access first row, second column

mat1[0,0] = 0
mat1[2,1] = 5


In [77]:
#accessing a matrix by specifying a row returns a 1D vector
print(f"mat1[1] = {mat1[1]}")  #access second row, returns a 1D vector

mat1[1] = [2 3]


#### 2. Slicing

Slicing creates an array of indices using a set of three values `(start:stop:step)`. A subset of values is also valid.

In [88]:
a = np.arange(20).reshape(-1, 5)
print(f"a = {a}\n")

print(f"a[0, 1:3:1] =  {a[0, 1:3:1]}\n") #in the 1st row, access elements from index 1 to 2 (inclusive), step by 1

print(f"a[:, 1:4:1] = {a[:, 1:4:1]}\n")  #In all rows, access elements from index 1 to 3 (inclusive), step by 1

print(f"a[:,:] = {a[:,:]}\n") # access all elements in each row and clumn

print(f"a[1,:] =  {a[1,:]}\n") # in the 2nd row, access all elements

# same as
print(f"a[1]  {a[1]}")




a = [[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]]

a[0, 1:3:1] =  [1 2]

a[:, 1:4:1] = [[ 1  2  3]
 [ 6  7  8]
 [11 12 13]
 [16 17 18]]

a[:,:] = [[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]]

a[1,:] =  [5 6 7 8 9]

a[1]  [5 6 7 8 9]
