# This file contains all the helper functions required to train the Deep Neural Network

In [6]:
# Impors
import numpy as np
import matplotlib.pyplot as plt
import h5py

%matplotlib inline
plt.rcParams['figure.figsize'] = (6.0, 5.0)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

np.random.seed(1)

In [7]:
# Initialize the b and w for deep NN
def initialize_parameters(layer_dims):
    np.random.seed(3)
    params = {}
    L = len(layer_dims)

    for l in range(1, L):
        params['W'+str(l)] = np.random.randn(layer_dims[l], layer_dims[l - 1]) * 0.01
        params['b'+str(l)] = np.zeros((layer_dims[l], 1))

        # Verify the dimetions of the each layer W and b
        assert(params['W' + str(l)]).shape == (layer_dims[l], layer_dims[l-1])
        assert(params['b' + str(l)]).shape == (layer_dims[l], 1) 

    return params

In [8]:
# # Verify the output of the function
# parameters = initialize_parameters([8,7,2,9,1])
# print("W1 = " , (parameters["W1"]).shape)
# print("b1 = " , (parameters["b1"]).shape)
# print("W2 = " , (parameters["W2"]).shape)
# print("b2 = " , (parameters["b2"]).shape)
# print("W3 = " , (parameters["W3"]).shape)
# print("b3 = " , (parameters["b3"]).shape)
# print("W4 = " , (parameters["W4"]).shape)
# print("b4 = " , (parameters["b4"]).shape)

Output should be:
W1 =  (7, 8)
b1 =  (7, 1)
W2 =  (2, 7)
b2 =  (2, 1)
W3 =  (9, 2)
b3 =  (9, 1)
W4 =  (1, 9)
b4 =  (1, 1)


In [9]:
# Activation functions
# Linear Activation Function
def linear_activation(A, W, b):
    Z = np.dot(W, A) + b

    #verify the output shape for proper activation
    assert(Z.shape == (W.shape[0], A.shape[1]))

    cache = (A, W, b)
    return Z, cache

# Sigmoid Activation Function
def sigmoid(Z):
    A = 1/(1 + np.exp(-Z))
    cache = Z
    return A, cache

# Relu Activation Function
def relu(Z):
    A = np.maximum(0, Z)
    cache = Z
    return A, cache

For given A, W and b:
A = [[ 1.62434536, -0.61175641]
 [-0.52817175, -1.07296862]
 [ 0.86540763, -2.3015387 ]]

W = [[ 1.74481176, -0.7612069,   0.3190391 ]]
b = [[-0.24937038]]

Z should be:
Z = [[ 3.26295337, -1.23429987]]

In [10]:
# # Verify linear_activation function output
# A = np.array([[ 1.62434536, -0.61175641],[-0.52817175, -1.07296862],[ 0.86540763, -2.3015387 ]])

# W = np.array([[ 1.74481176, -0.7612069,   0.3190391 ]])

# b = np.array([[-0.24937038]])

# Z, cache = linear_activation(A, W, b)
# A_sig, cache = sigmoid(Z)
# A_relu, cache = relu(Z)

# print('Linear: ', Z)
# print('Sigmoid: ', A_sig)
# print('Relu: ', A_relu)

In [11]:
# Activation Function forward
def activation_forward(A_prev, W, b, activation):
    Z, linear_cache = linear_activation(A_prev, W, b)
    if activation == 'sigmoid':
        A, activation_cache = sigmoid(Z)
    elif activation == 'relu':
        A, activation_cache = relu(Z)
    
    # Output verification
    assert(A.shape == (W.shape[0], A_prev.shape[1]))
    
    cache = (linear_cache, activation_cache)
    
    return A, cache
        


In [12]:
# A_prev = np.array([[-0.41675785, -0.05626683], [-2.1361961,   1.64027081], [-1.79343559, -0.84174737]])
# W = np.array([[ 0.50288142, -1.24528809, -1.05795222]])
# b = np.array([[-0.90900761]])

# A, linear_cache = activation_forward(A_prev, W, b, activation = "sigmoid")
# print("With sigmoid: A = " + str(A))

# A, activation_cache = activation_forward(A_prev, W, b, activation = "relu")
# print("With ReLU: A = " + str(A))

In [13]:
# Linear Model Function Which replicate the n layers and uses different activation function for different layers
def linear_model(X, params):
    caches = []
    A = X
    L = int(len(params) / 2)
    print("Length of the Layers : ", L)
    for l in range(1, L):
        A_prev = A
        A, cache = activation_forward(A_prev, params['W' + str(l)], params['b'+str(l)], 'relu')
        caches.append(cache)
    AL, cache = activation_forward(A, params['W' + str(L)], params['b' + str(L)], 'sigmoid' )
    caches.append(cache)

    # Verify the output shape
    assert(AL.shape == (1, X.shape[1]))
    return AL, caches

In [14]:
# # Verify the output
# X = np.array([[-0.31178367,  0.72900392, 0.21782079, -0.8990918 ],
#  [-2.48678065,  0.91325152,  1.12706373, -1.51409323],
#  [ 1.63929108, -0.4298936,   2.63128056,  0.60182225],
#  [-0.33588161,  1.23773784,  0.11112817,  0.12915125],
#  [ 0.07612761, -0.15512816,  0.63422534,  0.810655  ]])
# params = {'W1': np.array([[ 0.35480861,  1.81259031, -1.3564758 , -0.46363197,  0.82465384],
#        [-1.17643148,  1.56448966,  0.71270509, -0.1810066 ,  0.53419953],
#        [-0.58661296, -1.48185327,  0.85724762,  0.94309899,  0.11444143],
#        [-0.02195668, -2.12714455, -0.83440747, -0.46550831,  0.23371059]]), 
#        'b1': np.array([[ 1.38503523], [-0.51962709], [-0.78015214], [ 0.95560959]]), 
#        'W2': np.array([[-0.12673638, -1.36861282,  1.21848065, -0.85750144], [-0.56147088, -1.0335199 ,  0.35877096,  1.07368134], [-0.37550472,  0.39636757, -0.47144628,  2.33660781]]), 
#        'b2': np.array([[ 1.50278553], [-0.59545972], [ 0.52834106]]), 
#        'W3': np.array([[ 0.9398248 ,  0.42628539, -0.75815703]]), 
#        'b3': np.array([[-0.16236698]])}
# AL, caches = linear_model(X, params)
# print("AL = " + str(AL))
# print("Length of caches list = " + str(len(caches)))


In [15]:
# Conpute Cost (Cost Function)
def compute_cost(AL, Y):
    m = Y.shape[1]
    cost = (-1/m)*(np.dot(Y, np.log(AL).T) + np.dot((1 - Y), np.log(1 - AL).T))
    cost = np.squeeze(cost)
    return cost

For given Y and AL :  Y = np.array([[1, 1, 0]])
AL = np.array([[0.8, 0.9, 0.4]])
Cost should be 0.2797765635793422

In [16]:
# #Test the cost function
# Y = np.array([[1, 1, 0]])
# AL = np.array([[0.8, 0.9, 0.4]])
# cost = compute_cost(AL, Y)
# print('Cost: ', cost)


# Backward Propogation

In [17]:
# Backward Activation functions
# Linear Backward Activation Function
def linear_backward(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]

    dA_prev = np.dot(W.T, dZ)
    dW = (1/m) * np.dot(dZ, A_prev.T)
    db = (1 / m) * np.sum(dZ, axis=1, keepdims=True)

    assert (dA_prev.shape == A_prev.shape)
    assert (dW.shape == W.shape)
    assert (db.shape == b.shape)
    
    return dA_prev, dW, db

# Sigmoid Backward Activation Function
def sigmoid_backward(dA, cache):
    Z = cache
    s = 1/(1 + np.exp(-Z))
    dZ = dA * s * (1 - s)
    assert(dZ.shape == Z.shape)
    return dZ

# Relu Backward Activation Function
def relu_backward(dA, cache):
    Z = cache
    dZ = np.array(dA, copy=True)
    dZ[Z <= 0] = 0
    assert(dZ.shape == Z.shape)
    return dZ

For 
dZ  = [[ 1.62434536, -0.61175641, -0.52817175, -1.07296862],
 [ 0.86540763, -2.3015387,  1.74481176, -0.7612069 ],
 [ 0.3190391,  -0.24937038,  1.46210794, -2.06014071]]

linear_cache = (
        np.array([[-0.3224172 , -0.38405435,  1.13376944, -1.09989127], [-0.17242821, -0.87785842,  0.04221375,  0.58281521], [-1.10061918,  1.14472371,  0.90159072,  0.50249434], [ 0.90085595, -0.68372786, -0.12289023, -0.93576943],
       [-0.26788808,  0.53035547, -0.69166075, -0.39675353]]), 
       np.array([[-0.6871727 , -0.84520564, -0.67124613, -0.0126646 , -1.11731035], [ 0.2344157 ,  1.65980218,  0.74204416, -0.19183555, -0.88762896], [-0.74715829,  1.6924546 ,  0.05080775, -0.63699565,  0.19091548]]), np.array([[2.10025514],
       [0.12015895], [0.61720311]]))

Output of linear_backward should be:

dA_prev = [[-1.15171336  0.06718465 -0.32046959  2.09812711]
 [ 0.6034588  -3.72508703  5.81700741 -3.84326836]
 [-0.4319552  -1.30987418  1.72354703  0.05070578]
 [-0.38981415  0.60811244 -1.25938424  1.47191593]
 [-2.52214925  2.67882551 -0.67947465  1.48119548]]
dW = [[ 0.07313866 -0.0976715  -0.87585828  0.73763362  0.00785716]
 [ 0.85508818  0.37530413 -0.59912656  0.71278189 -0.58931808]
 [ 0.97913304 -0.24376493 -0.08839671  0.55151192 -0.10290907]]
db = [[-0.14713785]
 [-0.11313155]
 [-0.13209101]]




In [18]:
# dZ  = [[ 1.62434536, -0.61175641, -0.52817175, -1.07296862],
#  [ 0.86540763, -2.3015387,  1.74481176, -0.7612069 ],
#  [ 0.3190391,  -0.24937038,  1.46210794, -2.06014071]]

# linear_cache = (
#         np.array([[-0.3224172 , -0.38405435,  1.13376944, -1.09989127], [-0.17242821, -0.87785842,  0.04221375,  0.58281521], [-1.10061918,  1.14472371,  0.90159072,  0.50249434], [ 0.90085595, -0.68372786, -0.12289023, -0.93576943],
#        [-0.26788808,  0.53035547, -0.69166075, -0.39675353]]), 
#        np.array([[-0.6871727 , -0.84520564, -0.67124613, -0.0126646 , -1.11731035], [ 0.2344157 ,  1.65980218,  0.74204416, -0.19183555, -0.88762896], [-0.74715829,  1.6924546 ,  0.05080775, -0.63699565,  0.19091548]]), np.array([[2.10025514],
#        [0.12015895], [0.61720311]]))

# dA_prev, dW, db = linear_backward(dZ, linear_cache)
# print ("dA_prev = "+ str(dA_prev))
# print ("dW = " + str(dW))
# print ("db = " + str(db))


In [19]:
# Get the value of dA_prev, dW, db by reverse engineering the activation and then lenear backward
def activation_backward(dA, cache, activation):

    linear_cache, activation_cache = cache

    if activation == 'relu':
        dZ = relu_backward(dA, activation_cache)
    elif activation == 'sigmoid':
        dZ = sigmoid_backward(dA, activation_cache)
    
    dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db

For this input of dAL and linear_activation_cache:

dAL=  np.array([[-0.41675785, -0.05626683]])
linear_activation_cache=  ((
    np.array([[-2.1361961 ,  1.64027081], [-1.79343559, -0.84174737], [ 0.50288142, -1.24528809]]), 
    np.array([[-1.05795222, -0.90900761,  0.55145404]]), 
    np.array([[2.29220801]])), 
    np.array([[ 0.04153939, -1.11792545]])
    )

Output should be :
sigmoid:
dA_prev = [[ 0.11017994  0.0110534 ]
 [ 0.09466817  0.00949723]
 [-0.05743092 -0.00576155]]
dW = [[ 0.10266786  0.09778551 -0.01968084]]
db = [[-0.05729622]]

relu:
dA_prev = [[ 0.44090989  0.        ]
 [ 0.37883606  0.        ]
 [-0.2298228   0.        ]]
dW = [[ 0.44513825  0.37371418 -0.10478989]]
db = [[-0.20837892]]

In [20]:
# dAL=  np.array([[-0.41675785, -0.05626683]])
# linear_activation_cache=  ((
#     np.array([[-2.1361961 ,  1.64027081], [-1.79343559, -0.84174737], [ 0.50288142, -1.24528809]]), 
#     np.array([[-1.05795222, -0.90900761,  0.55145404]]), 
#     np.array([[2.29220801]])), 
#     np.array([[ 0.04153939, -1.11792545]])
#     )
# dA_prev, dW, db = activation_backward(dAL, linear_activation_cache, activation = "sigmoid")
# print ("sigmoid:")
# print ("dA_prev = "+ str(dA_prev))
# print ("dW = " + str(dW))
# print ("db = " + str(db) + "\n")

# dA_prev, dW, db = activation_backward(dAL, linear_activation_cache, activation = "relu")
# print ("relu:")
# print ("dA_prev = "+ str(dA_prev))
# print ("dW = " + str(dW))
# print ("db = " + str(db))

In [21]:
# Now let's find the value of dA_prev, dW, db for each and every layer. To do that we need activation_backward_model
def activation_backward_model(AL, Y, caches):
    grads = {}
    L = len(caches) # Cache is calculated layer wise so no of caches is no of layers
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)

    dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    # We get dA, dW and db for the last layer first because it's sigmoid and all other layers are relu
    current_cache = caches[L-1] # Last layer
    grads['dA' + str(L-1)], grads['dW' + str(L)], grads['db' + str(L)] = activation_backward(dAL, current_cache, activation = 'sigmoid')

    # Now for the all other layers
    for l in reversed(range(L-1)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = activation_backward(grads["dA" + str(l + 1)], current_cache, activation = "relu")
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp
    return grads

In [22]:
# AL = np.array([[1.78862847, 0.43650985]])
# Y = np.array([[1, 0]])
# caches =  (((np.array([[ 0.09649747, -1.8634927 ],
#        [-0.2773882 , -0.35475898],
#        [-0.08274148, -0.62700068],
#        [-0.04381817, -0.47721803]]), np.array([[-1.31386475,  0.88462238,  0.88131804,  1.70957306],
#        [ 0.05003364, -0.40467741, -0.54535995, -1.54647732],
#        [ 0.98236743, -1.10106763, -1.18504653, -0.2056499 ]]), np.array([[ 1.48614836],
#        [ 0.23671627],
#        [-1.02378514]])), np.array([[-0.7129932 ,  0.62524497],
#        [-0.16051336, -0.76883635],
#        [-0.23003072,  0.74505627]])), ((np.array([[ 1.97611078, -1.24412333],
#        [-0.62641691, -0.80376609],
#        [-2.41908317, -0.92379202]]), np.array([[-1.02387576,  1.12397796, -0.13191423]]), np.array([[-1.62328545]])), np.array([[ 0.64667545, -0.35627076]])))
# grads = activation_backward_model(AL, Y, caches)
# print(grads)

In [23]:
# Update parameters with dW and db values to prepare W and b for next iteration.
def update_params(params, grads, learning_rate):
    L = len(params) // 2
    for l in range(L):
        params['W' + str(l+1)] = params['W' + str(l+1)] - learning_rate * grads['dW' + str(l+1)]
        params['b' + str(l+1)] = params['b' + str(l+1)] - learning_rate * grads['db' + str(l+1)]
    return params

In [24]:
# params = {'W1': np.array([[-0.41675785, -0.05626683, -2.1361961 ,  1.64027081],
#        [-1.79343559, -0.84174737,  0.50288142, -1.24528809],
#        [-1.05795222, -0.90900761,  0.55145404,  2.29220801]]), 'b1': np.array([[ 0.04153939],
#        [-1.11792545],
#        [ 0.53905832]]), 'W2': np.array([[-0.5961597 , -0.0191305 ,  1.17500122]]), 'b2': np.array([[-0.74787095]])}
# grads = {'dW1': np.array([[ 1.78862847,  0.43650985,  0.09649747, -1.8634927 ],
#        [-0.2773882 , -0.35475898, -0.08274148, -0.62700068],
#        [-0.04381817, -0.47721803, -1.31386475,  0.88462238]]), 'db1': np.array([[0.88131804],
#        [1.70957306],
#        [0.05003364]]), 'dW2': np.array([[-0.40467741, -0.54535995, -1.54647732]]), 'db2': np.array([[0.98236743]])}

# parameters = update_params(params, grads, 0.1)

# print ("W1 = "+ str(parameters["W1"]))
# print ("b1 = "+ str(parameters["b1"]))
# print ("W2 = "+ str(parameters["W2"]))
# print ("b2 = "+ str(parameters["b2"]))

In [25]:
# Load Datasets
def load_datasets():
    train_dataset = h5py.File('data/train_catvnoncat.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels

    test_dataset = h5py.File('data/test_catvnoncat.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels

    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
    
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

In [None]:
# train_x_orig, train_y, test_x_orig, test_y, classes = load_datasets()
# print(test_x_orig, train_x_orig,train_y,test_y)

[[[[158 104  83]
   [161 106  85]
   [162 107  84]
   ...
   [170 120 100]
   [167 121 103]
   [172 127 109]]

  [[158 103  82]
   [160 104  82]
   [162 105  83]
   ...
   [169 118  99]
   [164 117  98]
   [168 121 104]]

  [[158 104  82]
   [161 105  82]
   [162 105  83]
   ...
   [173 123 102]
   [169 122 104]
   [168 122 104]]

  ...

  [[102  68  52]
   [ 44  31  23]
   [ 28  23  20]
   ...
   [163 118 102]
   [163 120 103]
   [166 121 105]]

  [[ 99  67  51]
   [ 38  28  22]
   [ 30  26  23]
   ...
   [161 117 100]
   [164 121 104]
   [168 123 106]]

  [[127  95  72]
   [ 39  29  22]
   [ 30  25  22]
   ...
   [165 122 105]
   [169 126 109]
   [173 128 110]]]


 [[[115 110 111]
   [137 129 129]
   [155 146 145]
   ...
   [159 156 157]
   [141 141 145]
   [121 122 127]]

  [[123 118 120]
   [143 136 136]
   [159 153 150]
   ...
   [167 164 165]
   [151 151 154]
   [130 133 137]]

  [[135 130 130]
   [150 145 141]
   [164 159 153]
   ...
   [173 174 172]
   [160 162 162]
   [141 144

# With this utils function is complete