## Packages

In [101]:
import numpy as np
import matplotlib.pyplot as plt

import time
import numpy as np
import h5py
import matplotlib.pyplot as plt
import scipy
from PIL import Image
from scipy import ndimage

plt.rcParams['figure.figsize'] = (5.0, 4.0) 
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
np.random.seed(1)

## Helper Functions

In [103]:

def sigmoid(Z):
    A = 1/(1+np.exp(-Z))
    cache = Z
    return A, cache

def relu(Z):
    A = np.maximum(0,Z)
    assert(A.shape == Z.shape)
    cache = Z 
    return A, cache


def relu_backward(dA, cache):
    Z = cache
    dZ = np.array(dA, copy=True) 
    dZ[Z <= 0] = 0
    assert (dZ.shape == Z.shape)
    return dZ

def sigmoid_backward(dA, cache):
    Z = cache
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    assert (dZ.shape == Z.shape)
    return dZ


def load_data():
    train_dataset = h5py.File('W4A2/datasets/train_catvnoncat.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels

    test_dataset = h5py.File('W4A2/datasets/test_catvnoncat.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels

    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
    
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

## Outlining
- Initialize the parameters for a two-layer network and for an $L$-layer neural network
- Implement the forward propagation module 
- Compute the loss
- Implement the backward propagation module 
- Finally, update the parameters

## Initialization

In [139]:
def initialize_parameters_deep(layers_dims):
    np.random.seed(3)
    parameters = {}
    L = len(layers_dims)
    for i in range(1,L):
        parameters['W'+str(i)] = np.random.randn(layers_dims[i],layers_dims[i-1]) * 0.01
        parameters['b'+str(i)] = np.zeros((layers_dims[i],1))

    return parameters

In [7]:
parameters = initialize_parameters_deep([5,4,3])
print(parameters)

{'W1': array([[ 0.01788628,  0.0043651 ,  0.00096497, -0.01863493, -0.00277388],
       [-0.00354759, -0.00082741, -0.00627001, -0.00043818, -0.00477218],
       [-0.01313865,  0.00884622,  0.00881318,  0.01709573,  0.00050034],
       [-0.00404677, -0.0054536 , -0.01546477,  0.00982367, -0.01101068]]), 'b1': array([[0.],
       [0.],
       [0.],
       [0.]]), 'W2': array([[-0.01185047, -0.0020565 ,  0.01486148,  0.00236716],
       [-0.01023785, -0.00712993,  0.00625245, -0.00160513],
       [-0.00768836, -0.00230031,  0.00745056,  0.01976111]]), 'b2': array([[0.],
       [0.],
       [0.]])}


## Forward Propagation

In [9]:
def linear_forward(A,W,B):
    Z = np.dot(W,A) + B
    cache = (A,W,B)
    return Z, cache

In [19]:
A = np.random.randn(3,2)
B = np.random.randn(1,1)
W = np.random.randn(1,3)
Z, cache = linear_forward(A,W,B)
print(Z)

[[-0.27865558  1.1166525 ]]


In [20]:
def linear_activation_forward(A_prev,W,b,activation):
    if activation == 'sigmoid':
        Z, linear_cache = linear_forward(A_prev,W,b)
        A, activation_cache = sigmoid(Z)

    elif activation == 'relu':
        Z, linear_cache = linear_forward(A_prev,W,b)
        A, activation_cache = relu(Z)

    cache = (linear_cache, activation_cache)
    return A, cache

In [33]:
A_prev = np.random.randn(3,2)
B = np.random.randn(1,1)
W = np.random.randn(1,3)
Z, cache = linear_activation_forward(A_prev,W,B,'sigmoid')
print(Z)
Z, cache = linear_activation_forward(A_prev,W,B,'relu')
print(Z)

[[0.82671298 0.88354675]]
[[1.56250827 2.02645434]]


In [35]:
def l_model_forward(X,parameters):
    caches = []
    A = X
    L = len(parameters) // 2
    for i in range(1,L):
        A_prev = A
        A, cache = linear_activation_forward(A_prev,parameters['W'+str(i)],parameters['b'+str(i)],'relu')
        caches.append(cache)
    AL,cache = linear_activation_forward(A,parameters['W'+str(L)],parameters['b'+str(L)],'sigmoid')
    caches.append(cache)
    return AL,caches

In [65]:
np.random.seed(7)
X = np.random.randn(5,4)
W1 = np.random.randn(4,5)
b1 = np.random.randn(4,1)
W2 = np.random.randn(3,4)
b2 = np.random.randn(3,1)
W3 = np.random.randn(1,3)
b3 = np.random.randn(1,1)
parameters = {"W1": W1,"b1": b1,"W2": W2,"b2": b2,"W3": W3,"b3": b3}
AL,caches = l_model_forward(X,parameters)
print(AL)

[[0.30891685 0.42932176 0.28346704 0.26540328]]


## Cost Function :
$$-\frac{1}{m} \sum\limits_{i = 1}^{m} (y^{(i)}\log\left(a^{[L] (i)}\right) + (1-y^{(i)})\log\left(1- a^{[L](i)}\right)) $$

In [69]:
def compute_cost(AL,Y):
    m = Y.shape[0]
    cost = (-1 / m) * (np.dot(Y , np.log(AL).T) + np.dot((1-Y),np.log(1-AL).T))
    cost = np.squeeze(cost)
    return cost

In [70]:
Y = np.asarray([[1, 1, 0]])
AL = np.array([[.8,.9,0.4]])
cost = compute_cost(AL,Y)
print(cost)

0.8393296907380268


## Backward Propagation

$$ dW^{[l]} = \frac{\partial \mathcal{J} }{\partial W^{[l]}} = \frac{1}{m} dZ^{[l]} A^{[l-1] T}$$
$$ db^{[l]} = \frac{\partial \mathcal{J} }{\partial b^{[l]}} = \frac{1}{m} \sum_{i = 1}^{m} dZ^{[l](i)}$$
$$ dA^{[l-1]} = \frac{\partial \mathcal{L} }{\partial A^{[l-1]}} = W^{[l] T} dZ^{[l]}$$

In [73]:
def linear_backward(dZ,cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]
    dW = (1/m) * np.dot(dZ,A_prev.T)
    db = (1/m) * np.sum(dZ,axis=1,keepdims=True)
    dA_prev = np.dot(W.T,dZ)
    return dA_prev,dW,db

In [75]:
np.random.seed(1)
dZ = np.random.randn(3,4)
A = np.random.randn(5,4)
W = np.random.randn(3,5)
b = np.random.randn(3,1)
linear_cache = (A, W, b)

dA_prev,dW,db = linear_backward(dZ,linear_cache)
print("dA_prev: " + str(dA_prev))
print("dW: " + str(dW))
print("db: " + str(db))

dA_prev: [[-1.15171336  0.06718465 -0.3204696   2.09812712]
 [ 0.60345879 -3.72508701  5.81700741 -3.84326836]
 [-0.4319552  -1.30987417  1.72354705  0.05070578]
 [-0.38981415  0.60811244 -1.25938424  1.47191593]
 [-2.52214926  2.67882552 -0.67947465  1.48119548]]
dW: [[ 0.07313866 -0.0976715  -0.87585828  0.73763362  0.00785716]
 [ 0.85508818  0.37530413 -0.59912655  0.71278189 -0.58931808]
 [ 0.97913304 -0.24376494 -0.08839671  0.55151192 -0.10290907]]
db: [[-0.14713786]
 [-0.11313155]
 [-0.13209101]]


$$dZ^{[l]} = dA^{[l]} * g'(Z^{[l]}). $$

In [79]:
def linear_activation_backward(dA,cache,activation):
    linear_cache, activation_cache = cache

    if activation == 'sigmoid':
        dZ = sigmoid_backward(dA,activation_cache)
    elif activation == 'relu':
        dZ = relu_backward(dA,activation_cache)

    dA_prev,dW,db = linear_backward(dZ,linear_cache)
    return dA_prev,dW,db

In [90]:
np.random.seed(2)
dAL = np.random.randn(1,2)
A = np.random.randn(3,2)
W = np.random.randn(1,3)
b = np.random.randn(1,1)
Z = np.random.randn(1,2)
linear_cache = (A, W, b)
activation_cache = Z
linear_activation_cache = (linear_cache, activation_cache)
dA_prev, dW, db = linear_activation_backward(dAL, linear_activation_cache, activation = "sigmoid")
print("With sigmoid: dA_prev = \n" + str(dA_prev))
print("With sigmoid: dW = \n" + str(dW))
print("With sigmoid: db = \n" + str(db))

dA_prev, dW, db = linear_activation_backward(dAL, linear_activation_cache, activation = "relu")
print("With relu: dA_prev = \n" + str(dA_prev))
print("With relu: dW = \n" + str(dW))
print("With relu: db = \n" + str(db))


With sigmoid: dA_prev = 
[[ 0.11017994  0.01105339]
 [ 0.09466817  0.00949723]
 [-0.05743092 -0.00576154]]
With sigmoid: dW = 
[[ 0.10266786  0.09778551 -0.01968084]]
With sigmoid: db = 
[[-0.05729622]]
With relu: dA_prev = 
[[ 0.44090989  0.        ]
 [ 0.37883606  0.        ]
 [-0.2298228   0.        ]]
With relu: dW = 
[[ 0.44513824  0.37371418 -0.10478989]]
With relu: db = 
[[-0.20837892]]


In [94]:
def l_model_backward(AL,Y,caches):
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    current_cache = caches[L-1]
    grads["dA" + str(L-1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, "sigmoid")
    for l in reversed(range(L-1)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l + 1)], current_cache, activation = "relu")
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp
    return grads

In [96]:

np.random.seed(3)
AL = np.random.randn(1, 2)
Y = np.array([[1, 0]])

A1 = np.random.randn(4,2)
W1 = np.random.randn(3,4)
b1 = np.random.randn(3,1)
Z1 = np.random.randn(3,2)
linear_cache_activation_1 = ((A1, W1, b1), Z1)

A2 = np.random.randn(3,2)
W2 = np.random.randn(1,3)
b2 = np.random.randn(1,1)
Z2 = np.random.randn(1,2)
linear_cache_activation_2 = ((A2, W2, b2), Z2)

caches = (linear_cache_activation_1, linear_cache_activation_2)
grads = l_model_backward(AL, Y, caches)

print("dA0 = \n" + str(grads['dA0']))
print("dA1 = \n" + str(grads['dA1']))
print("dW1 = \n" + str(grads['dW1']))
print("dW2 = \n" + str(grads['dW2']))
print("db1 = \n" + str(grads['db1']))
print("db2 = \n" + str(grads['db2']))

dA0 = 
[[ 0.          0.52257901]
 [ 0.         -0.3269206 ]
 [ 0.         -0.32070404]
 [ 0.         -0.74079187]]
dA1 = 
[[ 0.12913162 -0.44014127]
 [-0.14175655  0.48317296]
 [ 0.01663708 -0.05670698]]
dW1 = 
[[0.41010002 0.07807203 0.13798444 0.10502167]
 [0.         0.         0.         0.        ]
 [0.05283652 0.01005865 0.01777766 0.0135308 ]]
dW2 = 
[[-0.39202432 -0.13325855 -0.04601089]]
db1 = 
[[-0.22007063]
 [ 0.        ]
 [-0.02835349]]
db2 = 
[[0.15187861]]


In [99]:
def update_parameters(params, grads, learning_rate):

    parameters = params
    L = len(parameters) // 2 
    for l in range(L):
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l+1)]

    return parameters

In [100]:
np.random.seed(2)
W1 = np.random.randn(3,4)
b1 = np.random.randn(3,1)
W2 = np.random.randn(1,3)
b2 = np.random.randn(1,1)
parameters = {"W1": W1,"b1": b1,"W2": W2,"b2": b2}

np.random.seed(3)
dW1 = np.random.randn(3,4)
db1 = np.random.randn(3,1)
dW2 = np.random.randn(1,3)
db2 = np.random.randn(1,1)
grads = {"dW1": dW1,"db1": db1,"dW2": dW2,"db2": db2}
parameters = update_parameters(parameters, grads, 0.1)

print ("W1 = "+ str(parameters["W1"]))
print ("b1 = "+ str(parameters["b1"]))
print ("W2 = "+ str(parameters["W2"]))
print ("b2 = "+ str(parameters["b2"]))

W1 = [[-0.59562069 -0.09991781 -2.14584584  1.82662008]
 [-1.76569676 -0.80627147  0.51115557 -1.18258802]
 [-1.0535704  -0.86128581  0.68284052  2.20374577]]
b1 = [[-0.04659241]
 [-1.28888275]
 [ 0.53405496]]
W2 = [[-0.55569196  0.0354055   1.32964895]]
b2 = [[-0.84610769]]
