In [2]:
import numpy as np


# 1. Vectorization vs For loop

In [3]:
import time

# create 2 arrays of random numbers between 0 and 1
# with length of 1000000
a = np.random.rand(1000000)
b = np.random.rand(1000000)

# use tic & toc to find the time to compute c
tic = time.time()
c = np.dot(a,b)
toc = time.time()
vectorized_time = 1000*(toc-tic)
print("c = ", c)
print("Vectorized version: " + str(vectorized_time) + " ms")

c =  249854.66804390395
Vectorized version: 77.46696472167969 ms


In [4]:
# now we try with for loop and see if it takes longer
c = 0

tic = time.time()
for i in range(1000000):
    c += a[i] * b[i]
toc = time.time()
forloop_time = 1000*(toc-tic)
print("c = ", c)
print("For loop: " + str(forloop_time) + " ms")

print("forloop/vectorized = ", forloop_time/vectorized_time, "times")



c =  249854.66804390468
For loop: 480.2970886230469 ms
forloop/vectorized =  6.200024621445279 times


# 2. Broadcasting

In [5]:
A = np.array([[56.0, 0.0, 4.4, 68.0],
              [1.2, 104.0, 52.0, 8.0],
              [1.8, 135.0, 99.0, 0.9]])

print(A)

[[ 56.    0.    4.4  68. ]
 [  1.2 104.   52.    8. ]
 [  1.8 135.   99.    0.9]]


In [6]:
cal = A.sum(axis=0)
print(cal)

[ 59.  239.  155.4  76.9]


In [7]:
cal.reshape(1,4)

array([[ 59. , 239. , 155.4,  76.9]])

In [8]:
percentage = 100 * A / cal
print(percentage)

[[94.91525424  0.          2.83140283 88.42652796]
 [ 2.03389831 43.51464435 33.46203346 10.40312094]
 [ 3.05084746 56.48535565 63.70656371  1.17035111]]


## 2.1 Broadcasting general rule:
(m, n) matrix +,-,x,/ (1, n) matrix -> (m,n) matrix  
(m, n) matrix +,-,x,/ (m, 1) matrix -> (m,n) matrix  

In [9]:
x = np.array([[1, 2, 3],
              [2, 3, 4]])
y = np.array([[1, 1, 1]])
x + y

array([[2, 3, 4],
       [3, 4, 5]])

## 2.2 Avoiding broadcasting bugs

In [10]:
a = np.random.randn(5) # this create a rank 1 array - bad practice
print(a)

[ 0.3652757   0.92097252 -0.76283045  1.66948821 -0.73488654]


In [11]:
print(a.shape) 

(5,)


In [12]:
print(a.T)

[ 0.3652757   0.92097252 -0.76283045  1.66948821 -0.73488654]


In [13]:
print(np.dot(a, a.T))

4.890776132560658


In [14]:
# a and a.T is the same
# should avoid using this way, use the following instead
b = np.random.randn(5, 1) # force b to be a 5x1 matrix
assert(b.shape == (5,1)) # if True: nothing happens, if False: raise an error

In [15]:
print("b is:", b)
print("b' is:", b.T)
# now b.T is a 1x5 matrix

b is: [[-0.1511296 ]
 [ 1.02610215]
 [-1.46235364]
 [ 1.18484246]
 [-0.88318865]]
b' is: [[-0.1511296   1.02610215 -1.46235364  1.18484246 -0.88318865]]


In [16]:
print(np.dot(b, b.T))

[[ 0.02284016 -0.15507441  0.22100492 -0.17906477  0.13347595]
 [-0.15507441  1.05288563 -1.50052421  1.2157694  -0.90624178]
 [ 0.22100492 -1.50052421  2.13847816 -1.73265867  1.29153414]
 [-0.17906477  1.2157694  -1.73265867  1.40385165 -1.04643941]
 [ 0.13347595 -0.90624178  1.29153414 -1.04643941  0.7800222 ]]


In [17]:
b.reshape(1, 5)

array([[-0.1511296 ,  1.02610215, -1.46235364,  1.18484246, -0.88318865]])

In [20]:
a = np.random.randn(2, 3) # a.shape = (2, 3)
b = np.random.randn(2, 1) # b.shape = (2, 1)
c = a + b
c


array([[-1.5914963 , -0.1450986 , -0.44239164],
       [ 0.91791219,  0.13068978,  1.51130387]])

In [22]:
a = np.random.randn(12288, 150) # a.shape = (12288, 150)
b = np.random.randn(150, 45) # b.shape = (150, 45)
c = np.dot(a,b)
c.shape

(12288, 45)

In [24]:
a = np.random.randn(3, 3)
b = np.random.randn(3, 1)
c = a*b
c

array([[ 0.34774996, -3.69359511, -1.01186991],
       [-1.24770507,  0.27393827, -0.6618045 ],
       [-1.19553643, -0.88304163,  0.43509285]])