In [1]:
# Numpy as typically faster than python code because it's algorithms are written in C
import numpy as np

my_arr = np.arange(1_000_000)

my_list = list(range(1_000_000))

In [2]:
%timeit my_arr2 = my_arr * 2

8.88 ms ± 3.06 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [3]:
%timeit my_list2 = [x * 2 for x in my_list]

70.5 ms ± 8.29 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [4]:
# one of the key things we can do with numpy as word with nd arrays
data = np.array([[1.5, -0.1, 3], [0, -3, 6.5]])
data

array([[ 1.5, -0.1,  3. ],
       [ 0. , -3. ,  6.5]])

In [5]:
# we can do element-wise operations with arrays 
data * 10

array([[ 15.,  -1.,  30.],
       [  0., -30.,  65.]])

In [6]:
data + data

array([[ 3. , -0.2,  6. ],
       [ 0. , -6. , 13. ]])

In [7]:
# we can also get their attributes
print(data.shape)
print(data.dtype)

(2, 3)
float64


In [8]:
# creating nd arrays
data1 = [6, 7.5, 8, 0, 1] # list
arr1 = np.array(data1) # make an array based on a list/vector
arr1


array([6. , 7.5, 8. , 0. , 1. ])

In [9]:
# 2d array
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [10]:
# we can check their ndim and shape attributes
print(arr2.ndim)
print(arr2.shape)

2
(2, 4)


In [11]:
# we can infer the data type of each array
print(arr1.dtype)
print(arr2.dtype)

float64
int64


In [12]:
# there are other functions we can use with numpy
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [13]:
np.zeros((3,6)) # we pass in a tuple to specify dimensions

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [14]:
np.empty((2, 3, 2)) # don't assume this will return zeros

array([[[6.04482112e-312, 3.16202013e-322],
        [0.00000000e+000, 0.00000000e+000],
        [1.43761413e+161, 3.40909707e-057]],

       [[7.79350804e-071, 2.58080934e-057],
        [1.15345219e-071, 6.75392471e-067],
        [1.05186924e-046, 1.83059760e-076]]])

In [15]:
# arange is an array valued version of Python's built in range function
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [16]:
# we can cast an array so change the data type
arr = np.array([1, 2, 3, 4, 5])
arr.dtype

dtype('int64')

In [17]:
float_arr = arr.astype(np.float64)
float_arr, float_arr.dtype

(array([1., 2., 3., 4., 5.]), dtype('float64'))

In [18]:
# if I cast floating point numbers to integers, decimal will be truncated
arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])
arr

array([ 3.7, -1.2, -2.6,  0.5, 12.9, 10.1])

In [19]:
arr.astype(np.int32)

array([ 3, -1, -2,  0, 12, 10], dtype=int32)

In [20]:
# you can also use another array's dtype attribute
int_array = np.arange(10)
calibers = np.array([.22, .270, .357, .380, .44, .50], dtype=np.float64)
int_array.astype(calibers.dtype)


array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [21]:
# there are shorthand type code strings
zeros_uint32 = np.zeros(8, dtype='u4')
zeros_uint32

array([0, 0, 0, 0, 0, 0, 0, 0], dtype=uint32)

In [22]:
# arithmetic operations with numpy arrays
arr = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float64)
arr

array([[1., 2., 3.],
       [4., 5., 6.]])

In [23]:
arr * arr

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [24]:
arr - arr

array([[0., 0., 0.],
       [0., 0., 0.]])

In [25]:
# arithmetic operations with scalars propagate scalar argument to each element in array
1 / arr

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

In [26]:
arr ** 2

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [27]:
# comparison between arrays of same size yield Boolean arrays
arr2 = np.array([[0., 4., 1.], [7., 2., 12.]])
arr2

array([[ 0.,  4.,  1.],
       [ 7.,  2., 12.]])

In [28]:
arr2 > arr

array([[False,  True, False],
       [ True, False,  True]])

In [29]:
# basic indexing and slicing
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [30]:
arr[5]

np.int64(5)

In [31]:
arr[5:8]

array([5, 6, 7])

In [32]:
arr[5:8] = 12
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [33]:
arr_slice = arr[5:8]
arr_slice

array([12, 12, 12])

In [34]:
# this is reference semantics in play where arr, arr_slice are referencing the same array
# so when we change something through arr_slice, arr also changes cause it's the same thing
arr_slice[1] = 12345
arr

array([    0,     1,     2,     3,     4,    12, 12345,    12,     8,
           9])

In [35]:
arr_slice[:] = 64
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

In [36]:
# this indexing also works for 2D arrays
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d[2]

array([7, 8, 9])

In [37]:
# we can also access individual elements in two ways
arr2d[0][2], arr2d[0, 2]

(np.int64(3), np.int64(3))

In [47]:
# multidimensional arrays
arr3d = np.array([[[1,2,3], [4,5,6]], [[7,8,9], [10,11,12]]])
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [48]:
arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [49]:
old_values = arr3d[0].copy()
arr3d[0] = 42
arr3d


array([[[42, 42, 42],
        [42, 42, 42]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [50]:
arr3d[0] = old_values
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [56]:
# EXERCISES
# 1. Array Creation
# create a numpy array containing numbers from 1 to 20
arr = np.arange(1, 21)

# reshape it into a 4 x 5 matrix
arr = arr.reshape((4,5))

# print the second row
print(arr[1])

# print the last column
print(arr[:,-1])

[ 6  7  8  9 10]
[ 5 10 15 20]


In [65]:
# 2. vectorized computation
# make an array of 100 equally spaced numbers between 0 and 2 pi
arr = np.linspace(0, np.pi * 2, 100)

# compute sin(x) and cos(x) for all elements
sin = np.sin(arr)
cos = np.cos(arr)

# verify pythagorean identity: sin^2(x) + cos^2(x) = 1
(sin ** 2) + (cos ** 2)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [None]:
# 3. Boolean Masking
# Generate an array of 20 random integers between 1 and 100
arr = np.random.randint(1, 101, size=20)

# extract only numbers divisible by 3
div_by_3 = arr[arr % 3 == 0]

# replace all numbers greater than 80 with -1
arr[arr > 80] = -1

[-1 -1 63 30 48 -1 52 27 17 21 -1 11 63 70 65 -1 -1 42 37 67]


In [80]:
# 4. Aggregation
# create a 5 x 5 matrix of random integers between 10 and 50
arr = np.random.randint(10, 51, size=25).reshape((5,5))

# find mean of each row
row_means = arr.mean(axis=1)

# find the max of each column
col_max = arr.max(axis=0)

In [None]:
# 5. Fancy Indexing
arr = np.arange(10, 110, 10)

# use fancy indexing to get elements at positions 0,3,5,7
selected = arr[[0,3,5,7]]

# reverse the array using slicing
reversed_arr = arr[::-1]

array([100,  90,  80,  70,  60,  50,  40,  30,  20,  10])

In [93]:
# 6. Broadcasting
# create a 3x3 matrix filled with 1's
arr = np.ones(9).reshape((3,3))

# add the vector [1,2,3] to every row
arr + np.array([1,2,3])

# multiply each column by [1,2,3]
arr * np.array([1,2,3]).reshape((1,3))

array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

In [101]:
# 7. Linear Algebra
# Make two 3 x 3 matrices A and B (random integers from 1 to 10)
A = np.random.randint(1,10, size=9).reshape((3,3))
B = np.random.randint(1,10, size=9).reshape((3,3))

# compute matrix product of A and B
np.dot(A, B)

# compute transpose of A
A.T

# Solve system Ax = b where b is a vector of 3 random integers
b = np.random.randint(1,10, size=3)
x = np.linalg.solve(A,b)
x

array([-1. ,  2.5, -0.5])