In [1]:
import time
import numpy as np
import h5py
import matplotlib.pyplot as plt
import scipy
from PIL import Image
from scipy import ndimage
from dnn_app_utils_v3 import *
from public_tests import *

In [2]:
train_x_orig, train_y, test_x_orig, test_y, classes = load_data()

In [3]:
# function to create any image sample of shape (64, 64) * 3 to its eqouivalent flattened form

def create_flattened_data(data):

    X = data.reshape((data[1].shape[0]*data[1].shape[0]*3, len(data)))
    flattened_data = X/255.
    return flattened_data

In [4]:
X = create_flattened_data(train_x_orig)
# Y = create_flattened_data(train_y)

In [5]:
def initialize_deep_weights(X, layers_dims):

    L = len(layers_dims)
    parameters = {}

    parameters['W1'] = np.random.randn(layers_dims[0], X.shape[0])
    parameters['b1'] = np.zeros((layers_dims[0], 1))
    
    for l in range(1, L):

        parameters['W' + str(l+1)] = np.random.randn(layers_dims[l], layers_dims[l-1]) * 0.01
        parameters['b' + str(l+1)] = np.zeros((layers_dims[l], 1))
    
    return parameters

In [6]:
def linear_forward(A, W, b):

    Z = np.dot(W, A) + b
    
    cache = (A, W, b) # Here A is actually A_prev which is activation from previous layer
    return Z, cache    

In [7]:
def linear_activation_forward(A_prev, W, b, activation):
    # A_prev = X
    if activation == "relu":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)

    elif activation == "sigmoid":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)

    cache = (linear_cache, activation_cache)
    return A, cache

In [8]:

def L_model_forward(X, parameters):

    caches = []
    L = len(parameters) // 2
    A_prev = X
    for l in range(1, L):
        W = parameters['W' + str(l)]
        b = parameters['b' + str(l)]

        A_prev, cache = linear_activation_forward(A_prev, W, b, activation="relu")
        caches.append(cache)

    W = parameters['W' + str(L)]
    b = parameters['b' + str(L)]
    AL, activation_cache = linear_activation_forward(A_prev, W, b, activation="sigmoid")

    caches.append(activation_cache)
    return AL, caches

In [9]:
parameters = initialize_deep_weights(X, [5, 4, 1])

AL, caches = L_model_forward(X, parameters)


In [10]:
AL.shape

(1, 209)

In [11]:
A_prev = caches[2][0][0].shape
W = caches[2][0][1].shape

In [12]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    cost = -(1/m)*np.sum(Y*np.log(AL)+(1-Y)*np.log(1-AL))

    return cost

# Formulas for backprop

dW = (1/m)*np.dot(dZ, A_prev.T)
    db = (1/m)*np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)

In [13]:
def linear_backward(A_prev, W, dZ):

    m = X.shape[1]

    dW = (1/m)*np.dot(dZ, A_prev.T)
    db = (1/m)*np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)

    return dA_prev, dW, db

In [14]:
def linear_activation_backward(A_prev, A, W, dZ, activation):

    if activation == 'relu':
        dZ = relu_backward(dZ, A)
        dA_prev, dW, db = linear_backward(A_prev, W, dZ)
        
    if activation == 'sigmoid':
        dZ = sigmoid_backward(dZ, A)
        dA_prev, dW, db = linear_backward(A_prev, W, dZ)
        
        # print("worked fine")
    return dA_prev, dW, db


In [15]:
caches[2][0][1].shape # W
caches[2][0][2].shape # b

(1, 1)

In [16]:
caches[2][0][0].shape # A_prev
caches[2][1].shape # A

(1, 209)

In [17]:
for l in reversed(range(3)):
    A_prev = caches[l][0][0]
    W = caches[l][0][1]
    A = caches[l][1]
    print(A_prev.shape)
    print(W.shape)
    print(A.shape)

(4, 209)
(1, 4)
(1, 209)
(5, 209)
(4, 5)
(4, 209)
(12288, 209)
(5, 12288)
(5, 209)


In [18]:
def L_model_backward(AL, caches, Y):

    L = len(caches)
    gradients = []
    dAL = -(np.divide(Y, AL) - np.divide(1-Y, 1-AL))
    A_prev = caches[L-1][0][0]
    W = caches[L-1][0][1]
    A = caches[L-1][1]
    dZL, dW, db = linear_activation_backward(A_prev, A, W, dAL, activation='sigmoid')

    gradients.append((dZL, dW, db))
    for l in reversed(range(L-1)):
        A_prev = caches[l][0][0]
        W = caches[l][0][1]
        A = caches[l][1]
        
        dZL, dW, db = linear_activation_backward(A_prev, A, W, dZL, activation='relu')
        gradients.append((dZL, dW, db))
    # cost = compute_cost(AL, Y)
    return gradients
    


In [19]:
def update_parameters(parameters, gradients, learning_rate):

    for l in reversed(range(len(parameters)//2, 1)):
        W =  parameters['W'+ str(l)] 
        B =  parameters['b'+ str(l)]
        
        dW = gradients[l-1][1]
        db = gradients[l-1][2]

        parameters['W'+ str(l)] = W - learning_rate*dW
        parameters['b'+ str(l)] = B - learning_rate*db

    return parameters

In [20]:
gradients = L_model_backward(AL, caches, train_y)
updated_parameters = update_parameters(parameters, gradients=gradients, learning_rate=0.3)

In [21]:
updated_parameters['W1']

array([[-1.2114455 , -0.61315234,  1.63438421, ...,  2.65595536,
         1.17879201,  0.24777997],
       [ 0.34917504, -1.81839231, -0.38102678, ...,  0.39646309,
         0.23065634, -0.84582035],
       [-0.81511899,  0.1087941 , -0.3898773 , ..., -0.84246128,
         0.90791175,  0.36916795],
       [ 0.50427733,  1.76039912, -0.7538749 , ...,  0.63191632,
         0.37415278,  0.15387383],
       [ 0.14715899, -0.96619983, -0.241664  , ...,  0.45719513,
         0.4683928 , -0.68200383]])

worked fine


In [115]:
gradients[0][2].shape

(4, 1)

In [119]:
for l in reversed(range(len(parameters)//2)):
    print(l)

2
1
0


In [157]:
W.shape

(1, 2)

In [158]:
b.shape

(1, 1)

In [159]:
dZ.shape

(2, 209)

In [160]:
dW.shape

(1, 2)