# This file contains all the helper functions required to train the Deep Neural Network

In [2]:
# Impors
import numpy as np
import matplotlib.pyplot as plt
import h5py

%matplotlib inline
plt.rcParams['figure.figsize'] = (6.0, 5.0)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

np.random.seed(1)

Matplotlib is building the font cache; this may take a moment.


In [3]:
# Initialize the b and w for deep NN
def initialize_parameters(layer_dims):
    np.random.seed(3)
    params = {}
    L = len(layer_dims)

    for l in range(1, L):
        params['W'+str(l)] = np.random.randn(layer_dims[l], layer_dims[l - 1]) * 0.01
        params['b'+str(l)] = np.zeros((layer_dims[l], 1))

        # Verify the dimetions of the each layer W and b
        assert(params['W' + str(l)]).shape == (layer_dims[l], layer_dims[l-1])
        assert(params['b' + str(l)]).shape == (layer_dims[l], 1) 

    return params

In [4]:
# Verify the output of the function
parameters = initialize_parameters([8,7,2,9,1])
print("W1 = " , (parameters["W1"]).shape)
print("b1 = " , (parameters["b1"]).shape)
print("W2 = " , (parameters["W2"]).shape)
print("b2 = " , (parameters["b2"]).shape)
print("W3 = " , (parameters["W3"]).shape)
print("b3 = " , (parameters["b3"]).shape)
print("W4 = " , (parameters["W4"]).shape)
print("b4 = " , (parameters["b4"]).shape)

W1 =  (7, 8)
b1 =  (7, 1)
W2 =  (2, 7)
b2 =  (2, 1)
W3 =  (9, 2)
b3 =  (9, 1)
W4 =  (1, 9)
b4 =  (1, 1)


Output should be:
W1 =  (7, 8)
b1 =  (7, 1)
W2 =  (2, 7)
b2 =  (2, 1)
W3 =  (9, 2)
b3 =  (9, 1)
W4 =  (1, 9)
b4 =  (1, 1)


In [5]:
# Activation functions
# Linear Activation Function
def linear_activation(A, W, b):
    Z = np.dot(W, A) + b

    #verify the output shape for proper activation
    assert(Z.shape == (W.shape[0], A.shape[1]))

    cache = (A, W, b)
    return Z, cache

# Sigmoid Activation Function
def sigmoid(Z):
    A = 1/(1 + np.exp(-Z))
    cache = Z
    return A, cache

# Relu Activation Function
def relu(Z):
    A = np.maximum(0, Z)
    cache = Z
    return A, cache

For given A, W and b:
A = [[ 1.62434536, -0.61175641]
 [-0.52817175, -1.07296862]
 [ 0.86540763, -2.3015387 ]]

W = [[ 1.74481176, -0.7612069,   0.3190391 ]]
b = [[-0.24937038]]

Z should be:
Z = [[ 3.26295337, -1.23429987]]

In [6]:
# Verify linear_activation function output
A = np.array([[ 1.62434536, -0.61175641],[-0.52817175, -1.07296862],[ 0.86540763, -2.3015387 ]])

W = np.array([[ 1.74481176, -0.7612069,   0.3190391 ]])

b = np.array([[-0.24937038]])

Z, cache = linear_activation(A, W, b)
A_sig, cache = sigmoid(Z)
A_relu, cache = relu(Z)

print('Linear: ', Z)
print('Sigmoid: ', A_sig)
print('Relu: ', A_relu)

Linear:  [[ 3.26295336 -1.23429988]]
Sigmoid:  [[0.96313579 0.22542973]]
Relu:  [[3.26295336 0.        ]]


In [7]:
# Activation Function forward
def activation_forward(A_prev, W, b, activation):
    Z, linear_cache = linear_activation(A_prev, W, b)
    if activation == 'sigmoid':
        A, activation_cache = sigmoid(Z)
    elif activation == 'relu':
        A, activation_cache = relu(Z)
    
    # Output verification
    assert(A.shape == (W.shape[0], A_prev.shape[1]))
    
    cache = (linear_cache, activation_cache)
    
    return A, cache
        


In [8]:
A_prev = np.array([[-0.41675785, -0.05626683], [-2.1361961,   1.64027081], [-1.79343559, -0.84174737]])
W = np.array([[ 0.50288142, -1.24528809, -1.05795222]])
b = np.array([[-0.90900761]])

A, linear_cache = activation_forward(A_prev, W, b, activation = "sigmoid")
print("With sigmoid: A = " + str(A))

A, activation_cache = activation_forward(A_prev, W, b, activation = "relu")
print("With ReLU: A = " + str(A))

With sigmoid: A = [[0.96890023 0.11013289]]
With ReLU: A = [[3.43896134 0.        ]]


In [11]:
# Linear Model Function Which replicate the n layers and uses different activation function for different layers
def linear_model(X, params):
    caches = []
    A = X
    L = int(len(params) / 2)
    print("Length of the Layers : ", L)
    for l in range(1, L):
        A_prev = A
        A, cache = activation_forward(A_prev, params['W' + str(l)], params['b'+str(l)], 'relu')
        caches.append(cache)
    AL, cache = activation_forward(A, params['W' + str(L)], params['b' + str(L)], 'sigmoid' )
    caches.append(cache)

    # Verify the output shape
    assert(AL.shape == (1, X.shape[1]))
    return AL, caches

In [12]:
# Verify the output
X = np.array([[-0.31178367,  0.72900392, 0.21782079, -0.8990918 ],
 [-2.48678065,  0.91325152,  1.12706373, -1.51409323],
 [ 1.63929108, -0.4298936,   2.63128056,  0.60182225],
 [-0.33588161,  1.23773784,  0.11112817,  0.12915125],
 [ 0.07612761, -0.15512816,  0.63422534,  0.810655  ]])
params = {'W1': np.array([[ 0.35480861,  1.81259031, -1.3564758 , -0.46363197,  0.82465384],
       [-1.17643148,  1.56448966,  0.71270509, -0.1810066 ,  0.53419953],
       [-0.58661296, -1.48185327,  0.85724762,  0.94309899,  0.11444143],
       [-0.02195668, -2.12714455, -0.83440747, -0.46550831,  0.23371059]]), 
       'b1': np.array([[ 1.38503523], [-0.51962709], [-0.78015214], [ 0.95560959]]), 
       'W2': np.array([[-0.12673638, -1.36861282,  1.21848065, -0.85750144], [-0.56147088, -1.0335199 ,  0.35877096,  1.07368134], [-0.37550472,  0.39636757, -0.47144628,  2.33660781]]), 
       'b2': np.array([[ 1.50278553], [-0.59545972], [ 0.52834106]]), 
       'W3': np.array([[ 0.9398248 ,  0.42628539, -0.75815703]]), 
       'b3': np.array([[-0.16236698]])}
AL, caches = linear_model(X, params)
print("AL = " + str(AL))
print("Length of caches list = " + str(len(caches)))


Length of the Layers :  3
AL = [[0.03921668 0.70498921 0.19734387 0.04728177]]
Length of caches list = 3


In [15]:
# Conpute Cost (Cost Function)
def compute_cost(AL, Y):
    m = Y.shape[1]
    cost = (-1/m)*(np.dot(Y, np.log(AL).T) + np.dot((1 - Y), np.log(1 - AL).T))
    cost = np.squeeze(cost)
    return cost

For given Y and AL :  Y = np.array([[1, 1, 0]])
AL = np.array([[0.8, 0.9, 0.4]])
Cost should be 0.2797765635793422

In [16]:
#Test the cost function
Y = np.array([[1, 1, 0]])
AL = np.array([[0.8, 0.9, 0.4]])
cost = compute_cost(AL, Y)
print('Cost: ', cost)


Cost:  0.2797765635793422


# Backward Propogation

In [1]:
# Backward Activation functions
# Linear Backward Activation Function
def linear_backward_activation(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]

    dA_prev = np.dot(W.T, dZ)
    dW = (1/m) * np.dot(dZ, A_prev.T)
    db = (1 / m) * np.sum(dZ, axis=1, keepdims=True)

    assert (dA_prev.shape == A_prev.shape)
    assert (dW.shape == W.shape)
    assert (db.shape == b.shape)
    
    return dA_prev, dW, db

# Sigmoid Backward Activation Function
def sigmoid_backward_activation(dA, cache):
    Z = cache
    s = 1/(1 + np.exp(-Z))
    dZ = dA * s * (1 - s)
    assert(dZ.shape == Z.shape)
    return dZ

# Relu Backward Activation Function
def relu_backward_activation(dA, cache):
    Z = cache
    dZ = np.array(dA, copy=True)
    dZ[Z <= 0] = 0
    assert(dZ.shape == Z.shape)
    return dZ