<b>Import dependencies</b>

In [67]:
# import dependencies

import numpy as np
import math
import random
import time 

<b>Import dependencies</b>

In [36]:
# As always, the "hello world" test

print('Hello World')

Hello World


<b>Sigmoid function</b>

In [11]:
# sigmoid function for scalar

def scalar_sigmoid(z):
    '''
    Compute sigmoid of z.
    
    Args:
    z -- a scalar
    
    return:
    fx -- sigmoid(z)
    '''
    fx = 1 / (1 + math.exp(-z))
    
    return fx

In [18]:
# math sigmoid works well for scalar
print(f'sigmoid of 1: {scalar_sigmoid(1)}')

# but it can't deal with vectors.
v = np.array([1, 2, 3])
print(f'sigmoid of array: {scalar_sigmoid(v)}')

sigmoid of 1: 0.7310585786300049


TypeError: only size-1 arrays can be converted to Python scalars

In [19]:
# sigmoid function for vector

def sigmoid(z):
    '''
    Compute sigmoid of vector z.
    
    Args:
    z -- a vector
    
    return:
    fx -- sigmoid(z)
    '''
    fx = 1 / (1 + np.exp(-z))
    
    return fx

In [20]:
# numpy.exp-based sigmoid can deal with vectors.

v = np.array([1, 2, 3])
print(f'sigmoid of array: {sigmoid(v)}')

sigmoid of array: [0.73105858 0.88079708 0.95257413]


In [21]:
# sigmoid derivative

def sigmoid_derivative(x):
    '''
    Compute the derivative or gradient of sigmoid function
    
    Args:
    x -- a scalar or numpy array
    
    return:
    gradient -- the gradient of input
    '''
    
    gradient = sigmoid(x) * (1 - sigmoid(x))
    
    return gradient

In [22]:
# sigmoid derivative.

v = np.array([1, 2, 3])
print(f'gradient of sigmoid: {sigmoid_derivative(v)}')

gradient of sigmoid: [0.19661193 0.10499359 0.04517666]


<b>reshape array</b>

In [34]:
# reshape arrays

def image2vector(image):
    '''
    reshape an image to vector
    
    Args:
    image -- a three dimension numpy array
    
    return:
    v -- a vector
    '''
    width, height, channel = image.shape
    v = image.reshape((width * height * channel), 1)
    # or another way to do
    # v = image.reshape(-1,1)
    return v

In [33]:
image = np.random.randn(3,3,3)
vector = image2vector(image)

print(f'the image:\n {image} and it has shape: {image.shape} \n')
print(f'the image vector:\n {vector} and it has shape: {vector.shape}')

the image:
 [[[ 0.04045268 -0.01971152 -0.47313115]
  [-1.60336735 -1.93083494  0.63782758]
  [ 0.34380439  0.65142983  0.08093146]]

 [[-1.00027241 -0.08807278  1.09489329]
  [ 0.90130183  0.1696478  -0.02446409]
  [ 1.16356785  1.09527645  0.95970551]]

 [[ 1.47785042 -2.76965464  0.27683257]
  [-0.86655736  0.77620914 -0.98944492]
  [ 0.49573564  0.02096727 -0.13865792]]] and it has shape: (3, 3, 3) 

the image vector:
 [[ 0.04045268]
 [-0.01971152]
 [-0.47313115]
 [-1.60336735]
 [-1.93083494]
 [ 0.63782758]
 [ 0.34380439]
 [ 0.65142983]
 [ 0.08093146]
 [-1.00027241]
 [-0.08807278]
 [ 1.09489329]
 [ 0.90130183]
 [ 0.1696478 ]
 [-0.02446409]
 [ 1.16356785]
 [ 1.09527645]
 [ 0.95970551]
 [ 1.47785042]
 [-2.76965464]
 [ 0.27683257]
 [-0.86655736]
 [ 0.77620914]
 [-0.98944492]
 [ 0.49573564]
 [ 0.02096727]
 [-0.13865792]] and it has shape: (27, 1)


<b>Normalization</b>

In [43]:
def normalize(x, r_o_c):
    '''
    normalizing array by rows or columns
    
    Args:
    x -- numpy array needed to normalized
    roc -- string to determine normailze by row or column:
        'r' for row
        'c' for column
    
    return:
    x_normalized -- normalized numpy array
    '''
    
    if r_o_c == 'r':
        norm = np.linalg.norm(x, ord=2, axis=1, keepdims=True)
        x_normalized = x / norm
    elif r_o_c == 'c':
        norm = np.linalg.norm(x, ord=2, axis=0, keepdims=True)
        x_normalized = x / norm
    
    return x_normalized

In [45]:
# test the normalization process

x = np.random.randn(2,3)
print(f'numpy array needs to be normalized:\n {x}')

x_norm_by_row = normalize(x, 'r')
print(f'normalizing by row:\n {x_norm_by_row}')
x_norm_by_col = normalize(x, 'c')
print(f'normalizing by col:\n {x_norm_by_col}')

numpy array needs to be normalized:
 [[ 0.49082062 -0.15303719 -1.4937998 ]
 [ 0.02253928 -0.40399348  0.15050867]]
normalizing by row:
 [[ 0.3106856  -0.09687134 -0.94556354]
 [ 0.05220958 -0.93580309  0.34863553]]
normalizing by col:
 [[ 0.99894727 -0.354246   -0.99496248]
 [ 0.04587329 -0.93515227  0.10024803]]


<b>Softmax</b>

In [57]:
def softmax(x, r_o_c):
    '''
    compute softmax of x
    roc -- string to determine softmax by row or column:
        'r' for row
        'c' for column
    
    Args:
    x -- numpy array
    
    return:
    prob -- numpy array
    '''
    
    x_exp = np.exp(x)
    
    if r_o_c == 'r':
        x_sum_exp = np.sum(x_exp, axis=1, keepdims=True)
        s_x = x_exp / x_sum_exp
    elif r_o_c == 'c':
        x_sum_exp = np.sum(x_exp, axis=0, keepdims=True)
        s_x = x_exp / x_sum_exp
    return s_x

In [59]:
# test softmax function
# In practice, softmax should perform on all the features of each training example

x = np.random.randn(2,3)
print(f'numpy array feds to softmax:\n {x}')


s_x_row = softmax(x, 'r')
print(f'softmax by row:\n {s_x_row}')
s_x_col = softmax(x, 'c')
print(f'softmax by col:\n {s_x_col}')

numpy array feds to softmax:
 [[-0.70424196  0.22364216  0.07090757]
 [ 1.31965443 -0.95033193  0.74727981]]
softmax by row:
 [[0.17543648 0.44370554 0.38085798]
 [0.59970097 0.06195726 0.33834177]]
softmax by col:
 [[0.1167167  0.7638626  0.33707146]
 [0.8832833  0.2361374  0.66292854]]


<b>Vectorization</b>

In [103]:
# non-vectorizied version
np.random.seed(1)

x1: [5, 7, 12, 11, 6, 3, 18, 19, 8, 16]
x2: [9, 1, 6, 8, 7, 14, 5, 19, 16, 3]

#x1 = random.sample(range(1, 20), 10)
#x2 = random.sample(range(1, 20), 10)

print(f'x1: {x1}')
print(f'x2: {x2}')
print('---------------------------------')

# dot product
dot = 0
tic = time.process_time()
for i in range(len(x1)):
    dot += x1[i] * x2[i]
toc = time.process_time()
print(f'dot product: {dot} and the computation time is: {1000*(toc-tic)} ms')
print('---------------------------------')

# outer product
outer = np.zeros((len(x1), len(x2)))
tic = time.process_time()
for i in range(len(x1)):
    for j in range(len(x2)):
        outer[i][j] = x1[i] * x2[j]
toc = time.process_time()
print(f'outer product:\n {outer} and the computation time is: {1000*(toc-tic)} ms')
print('---------------------------------')

# element-wise product
ele_w_p = np.zeros(len(x1))
tic = time.process_time()
for i in range(len(x1)):
    ele_w_p[i] = x1[i] * x2[i]
toc = time.process_time()
print(f'element-wise product:\n {ele_w_p} and the computation time is: {1000*(toc-tic)} ms')
print('---------------------------------')

# general dot product
W = np.random.randn(3, len(x1))
g_d_p = np.zeros((W.shape[0], 1))
tic = time.process_time()
for i in range(W.shape[0]):
    for j in range(len(x1)):
        g_d_p[i] += W[i][j] * x1[j]
toc = time.process_time()
print(f'general dot product:\n {g_d_p} and the computation time is: {1000*(toc-tic)} ms')
print('---------------------------------')

x1: [5, 7, 12, 11, 6, 3, 18, 19, 8, 16]
x2: [9, 1, 6, 8, 7, 14, 5, 19, 16, 3]
---------------------------------
dot product: 923 and the computation time is: 0.14599999999997948 ms
---------------------------------
outer product:
 [[ 45.   5.  30.  40.  35.  70.  25.  95.  80.  15.]
 [ 63.   7.  42.  56.  49.  98.  35. 133. 112.  21.]
 [108.  12.  72.  96.  84. 168.  60. 228. 192.  36.]
 [ 99.  11.  66.  88.  77. 154.  55. 209. 176.  33.]
 [ 54.   6.  36.  48.  42.  84.  30. 114.  96.  18.]
 [ 27.   3.  18.  24.  21.  42.  15.  57.  48.   9.]
 [162.  18. 108. 144. 126. 252.  90. 342. 288.  54.]
 [171.  19. 114. 152. 133. 266.  95. 361. 304.  57.]
 [ 72.   8.  48.  64.  56. 112.  40. 152. 128.  24.]
 [144.  16.  96. 128. 112. 224.  80. 304. 256.  48.]] and the computation time is: 0.29699999999976967 ms
---------------------------------
element-wise product:
 [ 45.   7.  72.  88.  42.  42.  90. 361. 128.  48.] and the computation time is: 0.10499999999957765 ms
-------------------------

In [104]:
# vectorized version 
np.random.seed(1)

x1: [5, 7, 12, 11, 6, 3, 18, 19, 8, 16]
x2: [9, 1, 6, 8, 7, 14, 5, 19, 16, 3]

#x1 = random.sample(range(1, 20), 10)
#x2 = random.sample(range(1, 20), 10)

print(f'x1: {x1}')
print(f'x2: {x2}')
print('---------------------------------')

# dot product
tic = time.process_time()
dot = np.dot(x1, x2)
toc = time.process_time()
print(f'dot product: {dot} and the computation time is: {1000*(toc-tic)} ms')
print('---------------------------------')

# outer product
tic = time.process_time()
outer = np.outer(x1, x2)
toc = time.process_time()
print(f'outer product:\n {outer} and the computation time is: {1000*(toc-tic)} ms')
print('---------------------------------')

# element-wise product
tic = time.process_time()
ele_w_p = np.multiply(x1, x2)
toc = time.process_time()
print(f'element-wise product:\n {ele_w_p} and the computation time is: {1000*(toc-tic)} ms')
print('---------------------------------')

# general dot product
W = np.random.randn(3, len(x1))
tic = time.process_time()
g_d_p = np.dot(W,x1)
toc = time.process_time()
print(f'general dot product:\n {g_d_p} and the computation time is: {1000*(toc-tic)} ms')
print('---------------------------------')

x1: [5, 7, 12, 11, 6, 3, 18, 19, 8, 16]
x2: [9, 1, 6, 8, 7, 14, 5, 19, 16, 3]
---------------------------------
dot product: 923 and the computation time is: 0.17999999999940286 ms
---------------------------------
outer product:
 [[ 45   5  30  40  35  70  25  95  80  15]
 [ 63   7  42  56  49  98  35 133 112  21]
 [108  12  72  96  84 168  60 228 192  36]
 [ 99  11  66  88  77 154  55 209 176  33]
 [ 54   6  36  48  42  84  30 114  96  18]
 [ 27   3  18  24  21  42  15  57  48   9]
 [162  18 108 144 126 252  90 342 288  54]
 [171  19 114 152 133 266  95 361 304  57]
 [ 72   8  48  64  56 112  40 152 128  24]
 [144  16  96 128 112 224  80 304 256  48]] and the computation time is: 0.16899999999964166 ms
---------------------------------
element-wise product:
 [ 45   7  72  88  42  42  90 361 128  48] and the computation time is: 0.15799999999988046 ms
---------------------------------
general dot product:
 [ -0.50738686 -21.82137106   8.5613881 ] and the computation time is: 0.0879999

<b>L1 and L2 Loss</b>

In [106]:
def l1_loss(y, yhat):
    '''
    compute the l1 loss(distance)
    
    Args:
    y -- ground-truth array
    yhat -- predicted array
    
    return:
    l1_dist -- a scalar
    '''
    
    l1_dist = np.sum(abs(y - yhat))
    
    return l1_dist

In [129]:
def l2_loss(y, yhat):
    '''
    compute the l2 loss(distance)
    
    Args:
    y -- ground-truth array
    yhat -- predicted array
    
    return:
    l2_dist -- a scalar
    '''
    
    l2_dist = np.sum((np.square(y - yhat)))
    
    return l2_dist

In [131]:
# test the loss
y = np.random.randn(1,10)
yhat = np.random.randn(1,10)

print(f'predicted array:\n {y}')
print(f'ground-truth array:\n {yhat}')

l1 = l1_loss(y, yhat)
l2 = l2_loss(y, yhat)
print(f'the l1 loss: {l1} and the l2 loss: {l2}')

predicted array:
 [[ 1.16033857  0.36949272  1.90465871  1.1110567   0.6590498  -1.62743834
   0.60231928  0.4202822   0.81095167  1.04444209]]
ground-truth array:
 [[-0.40087819  0.82400562 -0.56230543  1.95487808 -1.33195167 -1.76068856
  -1.65072127 -0.89055558 -1.1191154   1.9560789 ]]
the l1 loss: 13.85634907635547 and the l2 loss: 24.774496760768955
