In [1]:
import numpy as np 
import time

# Vectors

---

### 3.1 Abstracts

Vectors are ordered arrays of numbers. 
The elements of a vector are all of the same type, will not be a mixture of both ints and chars for example.

The number of elements in the array is often referred to as the dimension though mathematicians may prefer rank.



### 3.2 NumPy Arrays

NumPy's basic data structure is an indexable, n-dimensional array containing elements of the same type (dtype). 
Here, dimension refers to the number of indexes of an array. A one-dimensional or 1-D array has one index. 
- 1-D array, shape (n,): n elements indexed [0] through [n-1]



### 3.3 Vector Creation

Data creation routines in NumPy will generally have a first parameter which is the shape of the object. This can either be a single value for a 1-D result or a tuple (n,m,...) specifying the shape of the result.

In [4]:
# NumPy routines which allocate memory and fill arrays with value
a = np.zeros(4);  print(f"np.zeros(4) : a = {a}, a shape = {a.shape}, a data type = {a.dtype}")
a = np.zeros((4,));  print(f"np.zeros(4,) : a = {a}, a shape = {a.shape}, a data type = {a.dtype}")
a = np.random.random_sample(4); print(f"np.random.random_sample(4): a = {a}, a shape = {a.shape}, a data type = {a.dtype}")

np.zeros(4) : a = [0. 0. 0. 0.], a shape = (4,), a data type = float64
np.zeros(4,) : a = [0. 0. 0. 0.], a shape = (4,), a data type = float64
np.random.random_sample(4): a = [0.85622043 0.09598271 0.16953253 0.16234753], a shape = (4,), a data type = float64


In [10]:
# NumPy routines which allocate memory and fill arrays with value but do not accept shape as input argument
a = np.arange(4.);  print(f"np.arange(4.):     a = {a}, a shape = {a.shape}, a data type = {a.dtype}")
a = np.random.rand(4); print(f"np.random.rand(4): a = {a}, a shape = {a.shape}, a data type = {a.dtype}")

np.arange(4.):     a = [0. 1. 2. 3.], a shape = (4,), a data type = float64
np.random.rand(4): a = [0.55277904 0.04516748 0.46433628 0.20329917], a shape = (4,), a data type = float64


In [11]:
# NumPy routines which allocate memory and fill with user specified values
a = np.array([5,4,3,2]);  print(f"np.array([5,4,3,2]):  a = {a},     a shape = {a.shape}, a data type = {a.dtype}")
a = np.array([5.,4,3,2]); print(f"np.array([5.,4,3,2]): a = {a}, a shape = {a.shape}, a data type = {a.dtype}")

np.array([5,4,3,2]):  a = [5 4 3 2],     a shape = (4,), a data type = int64
np.array([5.,4,3,2]): a = [5. 4. 3. 2.], a shape = (4,), a data type = float64


### 3.4 Operations on Vectors

##### 3.4.1 Indexing

Elements of vectors can be accessed via indexing and slicing.


In [15]:
# Vector indexing operations on 1-D vectors
a = np.arange(10)
print(a)

# access an element
print(f"a[2].shape: {a[2].shape} a[2]= {a[2]}, Accessing an element returns a scalar")

# last element
print(f"a[-1] = {a[-1]}")

# indexes must be within the range of the vector or they will produce an error
try:
    c = a[10]
except Exception as e:
    print("The error message you'll see is:")
    print(e)

[0 1 2 3 4 5 6 7 8 9]
a[2].shape: () a[2]= 2, Accessing an element returns a scalar
a[-1] = 9
The error message you'll see is:
index 10 is out of bounds for axis 0 with size 10


##### 3.4.2 Slicing

Sclicing creates an arrray of indices using a set of three values `(start:stop:step)`. A subset of values is also valid.

In [None]:
# vector slicing operations
a = np.arange(10)
print(f"a      = {a}")

# access 5 consecutive elements
c = a[2:7:1];  print("a[2:7:1] = ", c)

a      = [0 1 2 3 4 5 6 7 8 9]
a[2:7:1] =  [2 3 4 5 6]


##### 3.4.3 Single Vector Operations

There are a number of useful operations that involve operations on a single vector.

In [22]:
a = np.array([1,2,3,4])
print(f"a : {a}")
# negate elements of a
b = -a 
print(f"b = -a : {b}")

# sum all elements of a, returns a scalar
b = np.sum(a)
print(f"b = np.sum(a) : {b}")

b = np.mean(a)
print(f"b = np.mean(a): {b}")

b = a**2
print(f"b = a**2 : {b}")

a : [1 2 3 4]
b = -a : [-1 -2 -3 -4]
b = np.sum(a) : 10
b = np.mean(a): 2.5
b = a**2 : [ 1  4  9 16]


##### 3.4.4 Vector Vector element-wise operations

Most of the NumPy arithmetic, logical and comparison operations appy to vectors as well. These operators work on an element-by-element basis. For example

$c_i = a_i + b_i$

In [23]:
a = np.array([1,2,3,4])
b = np.array([-1,-2,3,4])
print(f"Binary operators work element wise: {a + b}")

Binary operators work element wise: [0 0 6 8]


In [24]:
# try a mismatched vector operations
c = np.array([1, 2])
try:
    d = a + c
except Exception as e:
    print("The error message you'll see is:")
    print(e)

The error message you'll see is:
operands could not be broadcast together with shapes (4,) (2,) 


##### Scalar Vector Operations

Vectors can be 'scaled' by scalar values. A scalar is just a number. The scalar multiplies all the elements of the vector

In [25]:
a = np.array([1,2,3,4])

# multiply a by a scalar
b = 5 * a
print(f"b = 5 * a : {b}")

b = 5 * a : [ 5 10 15 20]


##### 3.4.6 Vector Vector dot product

The dot product is a mainstay of Linear Algebra and NumPy. This is an operation used extensively in this course and should be well understood.

$a \cdot b = \sum_{i=0}^{n-1}a_ib_i$

$a \cdot b = [a_0b_0 + a_1b_1 + a_2b_2] = c$

In [26]:
def my_dot(a, b):
    """
   Compute the dot product of two vectors
 
    Args:
      a (ndarray (n,)):  input vector 
      b (ndarray (n,)):  input vector with same dimension as a
    
    Returns:
      x (scalar): 
    """

    x=0
    for i in range(a.shape[0]):
        x = x + a[i] * b[i]
    return x


In [27]:
# test 1-D
a = np.array([1,2,3,4])
b = np.array([-1, 4, 3, 2])
c = np.dot(a, b)
print(f"NumPy 1-D np.dot(a, b) = {c}, np.dot(a, b).shape = {c.shape}")
c = np.dot(b, a)
print(f"NumPy 1-D np.dot(b, a) = {c}, np.dot(a, b).shape = {c.shape} ")

NumPy 1-D np.dot(a, b) = 24, np.dot(a, b).shape = ()
NumPy 1-D np.dot(b, a) = 24, np.dot(a, b).shape = () 


##### 3.4.7 The Need for Speed: vector vs for loop

We utilized the NumPy library because it improves speed memory efficiency. Let's demonstrate:

In [None]:
np.random.seed(1)
a = np.random.rand(10000000)  # very large arrays
b = np.random.rand(10000000)

tic = time.time()  # capture start time
c = np.dot(a, b)
toc = time.time()  # capture end time

print(f"np.dot(a, b) =  {c:.4f}")
print(f"Vectorized version duration: {1000*(toc-tic):.4f} ms ")

tic = time.time()  # capture start time
c = my_dot(a,b)
toc = time.time()  # capture end time

print(f"my_dot(a, b) =  {c:.4f}")
print(f"loop version duration: {1000*(toc-tic):.4f} ms ")

del(a);del(b)  #remove these big arrays from memory

np.dot(a, b) =  2501072.5817
Vectorized version duration: 39.3169 ms 
my_dot(a, b) =  2501072.5817
loop version duration: 2451.6792 ms 


NumPy makes better use of available data parallelism in the underlying hardware. GPU's and modern CPU's implement Single Instruction, Multiple Data (SIMD) pipelines allowing multiple operations to be issued in parallel. This is critical in Machine Learning where the data sets are often very large

##### 3.4.8 Vector Vector operations in Course 1

Vector Vector operations will appear frequently in course 1

- Going forward, our examples will be stored in an array, X_train of dimension (m,n). This will be explained more in context, but here it is important to note it is a 2 Dimensional array or matrix
- `w` will be a 1-dimensional vector of shape (n,)
- we will perform operations by looping through the examples, extracting each example to work on individually by indexing X. For example: x[i]
- X[i] returns a value of shape (n,), a 1-dimensional vector. Consequently, operations involving X[i] are often vector-vector 

In [31]:
# show common Course 1 example
X = np.array([[1],[2],[3],[4]])
w = np.array([2])
c = np.dot(X[1], w)

print(f"X[1] has shape {X[1].shape}")
print(f"w has shape {w.shape}")
print(f"c has shape {c.shape}")

X[1] has shape (1,)
w has shape (1,)
c has shape ()


### 4 Matrices

##### 4.1 Abstract

Matrices are two dimensional arrays. The elements of a matrix are all of the same type. `m` is often the number of rows and `n` the number of columns.

##### NumPy Arrays

NumPy's basic data structure is an indexable, n-dimensional array containing elements of the same type(`dtype`). These were described earlier. Matrices have a two-dimensional(2-D) index [m, n].

In Course 1, 2-D matrices are used to hold trianing data. Training data is m examples by n features creating an (m,n) arrray. Course 1 does not do operations directly on matrices but typically extracts an example as a vector and operates on that


### 4.3 Matrix Creation

The same functions that created 1-D vectors will create 2-D or n-D arrays.

In [34]:
a = np.zeros((1, 5))
print(f"a shape = {a.shape}, a = {a}")  

a = np.zeros((2, 1))
print(f"a shape = {a.shape}, a = {a}")

a = np.random.random_sample((1, 1))
print(f"a shape = {a.shape}, a = {a}")  

a shape = (1, 5), a = [[0. 0. 0. 0. 0.]]
a shape = (2, 1), a = [[0.]
 [0.]]
a shape = (1, 1), a = [[0.44236513]]


In [35]:
# NumPy routines which allocate memory and fill with user specified values
a = np.array([[5], [4], [3]]);   print(f" a shape = {a.shape}, np.array: a = {a}")
a = np.array([[5],   # One can also
              [4],   # separate values
              [3]]); #into separate rows
print(f" a shape = {a.shape}, np.array: a = {a}")

 a shape = (3, 1), np.array: a = [[5]
 [4]
 [3]]
 a shape = (3, 1), np.array: a = [[5]
 [4]
 [3]]


### 4.4 Operations on Matrices

### 4.4 Indexing

Matrices include a second index. The two indexes describe [row, column].

In [38]:
# vector indexing operations on matrices
a = np.arange(6).reshape(-1,2)  # reshape is a convenient way to create matrices
print(f"a.shape: {a.shape}, \na = {a}")

# access an element
print(f"\na[2,0].shape:  {a[2, 0].shape}, a[2, 0] = {a[2, 0]}, type(a[2,0]) = {type(a[2, 0])} Accessing an element returns a scalar\n")

#access a row
print(f"a[2].shape:   {a[2].shape}, a[2]   = {a[2]}, type(a[2])   = {type(a[2])}")

a.shape: (3, 2), 
a = [[0 1]
 [2 3]
 [4 5]]

a[2,0].shape:  (), a[2, 0] = 4, type(a[2,0]) = <class 'numpy.int64'> Accessing an element returns a scalar

a[2].shape:   (2,), a[2]   = [4 5], type(a[2])   = <class 'numpy.ndarray'>


##### Reshape

`a = np.arange(6).reshape(-1, 2)`
This line of code first created a 1-D Vector of six elements. It then reshaped that vector into a 2-D array using the reshape command. This could have been written:
`a = np.arange(6).reshape(3, 2)`
To arrive at the same 3 row, 2 column array. the -1 argument tells the routine to compute the number of rows given the size of the array and the number of columns


##### 4.4.2 Slicing



In [39]:
#vector 2-D slicing operations
a = np.arange(20).reshape(-1, 10)
print(f"a = \n{a}")

#access 5 consecutive elements (start:stop:step)
print("a[0, 2:7:1] = ", a[0, 2:7:1], ",  a[0, 2:7:1].shape =", a[0, 2:7:1].shape, "a 1-D array")

#access 5 consecutive elements (start:stop:step) in two rows
print("a[:, 2:7:1] = \n", a[:, 2:7:1], ",  a[:, 2:7:1].shape =", a[:, 2:7:1].shape, "a 2-D array")

# access all elements
print("a[:,:] = \n", a[:,:], ",  a[:,:].shape =", a[:,:].shape)

# access all elements in one row (very common usage)
print("a[1,:] = ", a[1,:], ",  a[1,:].shape =", a[1,:].shape, "a 1-D array")
# same as
print("a[1]   = ", a[1],   ",  a[1].shape   =", a[1].shape, "a 1-D array")

a = 
[[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]]
a[0, 2:7:1] =  [2 3 4 5 6] ,  a[0, 2:7:1].shape = (5,) a 1-D array
a[:, 2:7:1] = 
 [[ 2  3  4  5  6]
 [12 13 14 15 16]] ,  a[:, 2:7:1].shape = (2, 5) a 2-D array
a[:,:] = 
 [[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]] ,  a[:,:].shape = (2, 10)
a[1,:] =  [10 11 12 13 14 15 16 17 18 19] ,  a[1,:].shape = (10,) a 1-D array
a[1]   =  [10 11 12 13 14 15 16 17 18 19] ,  a[1].shape   = (10,) a 1-D array
