# Numpy for Machine Learning

## Set up

In [1]:
import numpy as np

# Set seed for reproducibility
np.random.seed(seed=1234)

## Basics

In [2]:
# Scalar
x = np.array(6)
print("x: ", x)
print("x ndim: ", x.ndim)
print("x shape: ", x.shape)
print("x size: ", x.size)
print("x dtype: ", x.dtype)

x:  6
x ndim:  0
x shape:  ()
x size:  1
x dtype:  int32


In [3]:
# Vector
x = np.array([1.3, 2.2, 1.7])
print("x: ", x)
print("x ndim: ", x.ndim)
print("x shape: ", x.shape)
print("x size: ", x.size)
print("x dtype: ", x.dtype)

x:  [1.3 2.2 1.7]
x ndim:  1
x shape:  (3,)
x size:  3
x dtype:  float64


In [4]:
# Matrix 
x = np.array([[1,2], [3,4]])
print("x: ", x)
print("x ndim: ", x.ndim)
print("x shape: ", x.shape)
print("x size: ", x.size)
print("x dtype: ", x.dtype)

x:  [[1 2]
 [3 4]]
x ndim:  2
x shape:  (2, 2)
x size:  4
x dtype:  int32


In [5]:
# 3-D Tensor
x = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
print("x: ", x)
print("x ndim: ", x.ndim)
print("x shape: ", x.shape)
print("x size: ", x.size)
print("x dtype: ", x.dtype)

x:  [[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]
x ndim:  3
x shape:  (2, 2, 2)
x size:  8
x dtype:  int32


In [6]:
# Functions
print("np.zeros((2,2)):\n", np.zeros((2,2)))
print("np.ones((2,2)):\n", np.ones((2,2)))
# Identity matrix
print("np.eye((2)):\n", np.eye((2)))
print("np.random.random((2,2)):\n", np.random.random((2,2)))

np.zeros((2,2)):
 [[0. 0.]
 [0. 0.]]
np.ones((2,2)):
 [[1. 1.]
 [1. 1.]]
np.eye((2)):
 [[1. 0.]
 [0. 1.]]
np.random.random((2,2)):
 [[0.19151945 0.62210877]
 [0.43772774 0.78535858]]


## Indexing

In [7]:
# Indexing
x = np.array([1,2,3])
print("x: ", x)
print("x[0]: ", x[0])
x[0] = 0
print("x: ", x)

x:  [1 2 3]
x[0]:  1
x:  [0 2 3]


In [8]:
# Slicing
x = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
print(x)
print("x column 1: ", x[:, 1])
print("x row 0: ", x[0, :])
print("x rows 0,1 & cols 1,2: \n", x[0:2, 1:3])

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
x column 1:  [ 2  6 10]
x row 0:  [1 2 3 4]
x rows 0,1 & cols 1,2: 
 [[2 3]
 [6 7]]


In [9]:
# Integer array indexing
print(x)
rows_to_get = np.array([0, 1, 2])
print("rows_to_get: ", rows_to_get)
cols_to_get = np.array([0, 2, 1])
print("cols_to_get: ", cols_to_get)
# Combine above sequences to get values
print("indexed values: ", x[rows_to_get, cols_to_get])


[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
rows_to_get:  [0 1 2]
cols_to_get:  [0 2 1]
indexed values:  [ 1  7 10]


In [10]:
# Boolean array indexing
x = np.array([[1, 2], [3, 4], [5, 6]])
print("x:\n", x)
print("x > 2:\n", x > 2)
print("x[x > 2]:\n", x[x>2])

x:
 [[1 2]
 [3 4]
 [5 6]]
x > 2:
 [[False False]
 [ True  True]
 [ True  True]]
x[x > 2]:
 [3 4 5 6]


## Arithmetic

In [11]:
# Basic math
x = np.array([[1, 2], [3, 4]], dtype=np.float64)
y = np.array([[1, 2], [3, 4]], dtype=np.float64)
print("x + y:\n", np.add(x, y)) # or x + y
print("x - y:\n", np.subtract(x, y)) # or x - y
print("x * y:\n", np.multiply(x, y)) # or x * y

x + y:
 [[2. 4.]
 [6. 8.]]
x - y:
 [[0. 0.]
 [0. 0.]]
x * y:
 [[ 1.  4.]
 [ 9. 16.]]


## Dot product

One of the most common NumPy operations is matrix multiplication using the dot product.

In [12]:
# Dot product
a = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float64)
b = np.array([[7,8], [9, 10], [11,12]], dtype=np.float64)
c = a.dot(b)
print(f"{a.shape} . {b.shape} = {c.shape}")
print(c)

(2, 3) . (3, 2) = (2, 2)
[[ 58.  64.]
 [139. 154.]]


## Axis operations
We can also do operations across a specific axis

In [13]:
# Sum across a dimension
x = np.array([[1, 2], [3, 4]])
print(x)
print("sum all: ", np.sum(x)) # adds all elements
print("sum axis=0: ", np.sum(x, axis=0)) # sum across rows
print("sum axis=1: ", np.sum(x, axis=1)) # sum across cols

[[1 2]
 [3 4]]
sum all:  10
sum axis=0:  [4 6]
sum axis=1:  [3 7]


In [14]:
# Min/max
x = np.array([[1, 2, 3], [4, 5, 6]])
print("min: ", x.min())
print("max: ", x.max())
print("min axis=0: ", x.min(axis=0))
print("min axis=1: ", x.min(axis=1))
z

min:  1
max:  6
min axis=0:  [1 2 3]
min axis=1:  [1 4]


NameError: name 'z' is not defined

## Broadcast

In [None]:
# Broadcasting
x = np.array([1,2]) # vector
y = np.array(3) # scalar
z = x + y
print("z:\n", z)

## Gotchas

In [None]:
# What is the value of c and what are its dimensions?
a = np.array((3, 4, 5))
b = np.expand_dims(a, axis=1)
c = a + b
a.shape
b.shape
c.shape
print(c)

In [None]:
# How do we fix this? We need to be careful to ensure that a is the same shape
# as b if we don't want this unintentional broadcasting behavior
a = a.reshape(-1, 1)
a.shape # (3, 1)
c = a + b
c.shape # (3, 1)
print(c)

In [None]:
# Apply proper reshaping before using it for any operations
a = np.array([3, 4, 5])
a.shape # (3, )
a = a.reshape(-1, 1)
a.shape # (3, 1)

## Transpose

In [None]:
# Transposing
x = np.array([[1, 2, 3], [4, 5, 6]])
print("x:\n", x)
print("x.shape: ", x.shape)
y = np.transpose(x, (1, 0)) # flip dimensions at index 0 and 1
print("y:\n", y)
print("y.shape: ", y.shape)

## Reshape
Sometimes we'll need to alter dimensions of the matrix. Reshaping allows us to transform a tensor into different permissible shapes.

In [None]:
# Reshaping
x = np.array([[1, 2, 3, 4, 5, 6]])
print(x)
print("x.shape: ", x.shape)
y = np.reshape(x, (2, 3))
print("y: \n", y)
print("y.shape: ", y.shape)
z = np.reshape(x, (2, -1))
print("z: \n", z)
print("z.shape: ", z.shape)

## Joining

In [None]:
x = np.random.random((2, 3))
print(x)
print(x.shape)

In [None]:
# Concatenation
y = np.concatenate([x, x], axis=0) # Concat on a specified axis
print(y)
print(y.shape)

# Stacking
z = np.stack([x, x], axis=0) # Stack on new axis
print(z)
print(z.shape)

## Expanding/reducing

In [None]:
# Adding dimensions
x = np.array([[1, 2, 3], [4, 5, 6]])
print("x: \n", x)
print("x.shape: ", x.shape)
y = np.expand_dims(x, 1) # expand dim 1
print("y: \n", y)
print("y.shape: ", y.shape) # notice extra set of brackets