In [3]:
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from test_utils.testCases import *
from dnn_utils.activations import *

np.random.seed(3)

In [4]:
load_digits = datasets.load_digits()
X = pd.DataFrame(data=load_digits.data)
y = pd.DataFrame(data=load_digits.target)

print(load_digits.keys())

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=44)

print(f'Train Set:: X_train: {X_train.shape}, y_train: {y_train.shape}')
print(f'Test Set:: X_test: {X_test.shape}, y_test: {y_test.shape}')

dict_keys(['data', 'target', 'target_names', 'images', 'DESCR'])
Train Set:: X_train: (1203, 64), y_train: (1203, 1)
Test Set:: X_test: (594, 64), y_test: (594, 1)


In [38]:
"""

GOAL: 

1. Create a N (say N=2) layer NN, with each of the layers have specific numbers of units. 
For example, for N = 2, the number of units be [units_count1, units_count2].

2. Implement Forward Propagation for the NN.

3. Calculate cost.

4. Calculate Gradient by using Backward propagation.

5. Update parameters.

"""

In [5]:
def initialize_parameters(layer_dims):
    parameters = {}
    
    # Suppose layer_dims = [5, 4, 3]. It means, 5 inputs, 4 hidden units in layer 1 and 3 units in the output layer.
    for l in range(1, len(layer_dims)):
        
        # we will evaluate W1/b1 for layer 1 and W2/b2 for layer 2. 
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
    
    return parameters

In [6]:
""" Test Initialize parameters """

parameters = initialize_parameters([5, 4, 3])
print(parameters['W1'])
print(parameters['b1'])
print(parameters['W2'])
print(parameters['b2'])

[[ 0.01788628  0.0043651   0.00096497 -0.01863493 -0.00277388]
 [-0.00354759 -0.00082741 -0.00627001 -0.00043818 -0.00477218]
 [-0.01313865  0.00884622  0.00881318  0.01709573  0.00050034]
 [-0.00404677 -0.0054536  -0.01546477  0.00982367 -0.01101068]]
[[ 0.]
 [ 0.]
 [ 0.]
 [ 0.]]
[[-0.01185047 -0.0020565   0.01486148  0.00236716]
 [-0.01023785 -0.00712993  0.00625245 -0.00160513]
 [-0.00768836 -0.00230031  0.00745056  0.01976111]]
[[ 0.]
 [ 0.]
 [ 0.]]


In [7]:
def linear_forward(A, W, b):
    Z = np.dot(W, A) + b
    cache = (A, W, b)
    
    return Z, cache

In [8]:
""" Test linear forward """

A, W, b = linear_forward_test_case()
Z, cache = linear_forward(A, W, b)
print(Z)

[[ 3.26295337 -1.23429987]]


In [9]:
def linear_activation_forward(A_prev, W, b, activation):
    
    A = None
    activation_cache = None
    
    Z, linear_cache = linear_forward(A_prev, W, b)
    
    if activation == 'sigmoid':
        A, activation_cache = sigmoid(Z)
    elif activation == 'relu':
        A, activation_cache = relu(Z)
        
    cache = (linear_cache, activation_cache)
    return A, cache

In [10]:
""" Test linear activation forward """

A_prev, W, b = linear_activation_forward_test_case()
A, cache = linear_activation_forward(A_prev, W, b, activation='sigmoid')
print(A)

A, cache = linear_activation_forward(A_prev, W, b, activation='relu')
print(A)

[[ 0.96890023  0.11013289]]
[[ 3.43896131  0.        ]]


In [11]:
def L_model_forward(X, parameters):
    
    caches = []
    A = X
    L = len(parameters) // 2
    
    for l in range(1, L):
        A, cache = linear_activation_forward(A, parameters['W' + str(l)], parameters['b' + str(l)], 'relu')
        caches.append(cache)
    
    AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], 'sigmoid')
    caches.append(cache)
    
    return AL, caches

In [12]:
""" Test L_model_forward """

X, parameters = L_model_forward_test_case_2hidden()
AL, caches = L_model_forward(X, parameters)

print(AL)
print(len(caches))

[[ 0.03921668  0.70498921  0.19734387  0.04728177]]
3


In [13]:
def compute_cost(AL, Y):
    
    m = Y.shape[1]  # number of columns
    
    cost = -1/m * np.sum(np.multiply(Y, np.log(AL)) + np.multiply((1-Y), np.log(1-AL)))
    # cost = np.squeeze(cost)
    return cost

In [14]:
""" Test compute_cost """
Y, AL = compute_cost_test_case()
cost = compute_cost(AL, Y)
print(cost)

0.414931599615


In [15]:
def linear_backward(dZ, cache):
    
    A_prev, W, b = cache
    m = A_prev.shape[1]
    
    dW = 1/m * np.dot(dZ, A_prev.T)
    db = 1/m * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)
    
    return dA_prev, dW, db

In [16]:
""" Test linear_backward """
dZ, cache = linear_backward_test_case()
dA_prev, dW, db = linear_backward(dZ, cache)

print(f"dA_prev: {dA_prev}\n")
print(f"dW: {dW}\n")
print(f"db: {db}\n")

dA_prev: [[ 0.51822968 -0.19517421]
 [-0.40506361  0.15255393]
 [ 2.37496825 -0.89445391]]

dW: [[-0.10076895  1.40685096  1.64992505]]

db: [[ 0.50629448]]



In [17]:
def linear_activation_backward(dA, cache, activation):
    
    linear_cache, activation_cache = cache
    
    if activation == 'relu':
        dZ = relu_backward(dA, activation_cache)
    elif activation == 'sigmoid':
        dZ = sigmoid_backward(dA, activation_cache)

    dA_prev, dW, db = linear_backward(dZ, linear_cache)
    
    return dA_prev, dW, db

In [18]:
""" Test linear_activation_backward """

AL, cache = linear_activation_backward_test_case()
dA_prev, dW, db = linear_activation_backward(AL, cache, 'sigmoid')

print(f"dA_prev: {dA_prev}\n")
print(f"dW: {dW}\n")
print(f"db: {db}\n")

dA_prev, dW, db = linear_activation_backward(AL, cache, 'relu')

print(f"dA_prev: {dA_prev}\n")
print(f"dW: {dW}\n")
print(f"db: {db}\n")


dA_prev: [[ 0.11017994  0.01105339]
 [ 0.09466817  0.00949723]
 [-0.05743092 -0.00576154]]

dW: [[ 0.10266786  0.09778551 -0.01968084]]

db: [[-0.05729622]]

dA_prev: [[ 0.44090989 -0.        ]
 [ 0.37883606 -0.        ]
 [-0.2298228   0.        ]]

dW: [[ 0.44513824  0.37371418 -0.10478989]]

db: [[-0.20837892]]



In [19]:
def L_model_backward(AL, Y, caches):
    
    grads = {}
    L = len(caches)
    Y = Y.reshape(AL.shape)
    
    # for output layer
    dAL = - (np.divide(Y, AL) - np.divide(1-Y, 1-AL))
    current_cache = caches[L-1]
    grads['dA' + str(L-1)], grads['dW' + str(L)], grads['db' + str(L)] = linear_activation_backward(dAL, current_cache, 'sigmoid')
    
    # for the hidden layers in reverse order, i.e. from L-2 to 0
    for l in reversed(range(L-1)):
        current_cache = caches[l]
        grads['dA' + str(l)], grads['dW' + str(l+1)], grads['db' + str(l+1)] = linear_activation_backward(grads['dA' + str(l+1)], current_cache, 'relu')
        
    return grads

In [20]:
""" Test L_model_backward """

AL, Y_access, caches = L_model_backward_test_case()
grads = L_model_backward(AL, Y_access, caches)

print_grads(grads)

dW1 = [[ 0.41010002  0.07807203  0.13798444  0.10502167]
 [ 0.          0.          0.          0.        ]
 [ 0.05283652  0.01005865  0.01777766  0.0135308 ]]
db1 = [[-0.22007063]
 [ 0.        ]
 [-0.02835349]]
dA1 = [[ 0.12913162 -0.44014127]
 [-0.14175655  0.48317296]
 [ 0.01663708 -0.05670698]]


In [20]:
def update_parameters(parameters, grads, learning_rate):
    
    L = len(parameters) // 2
    
    for l in range(L):
        parameters['W' + str(l+1)] = parameters['W' + str(l+1)] - learning_rate * grads['dW' + str(l+1)]
        parameters['b' + str(l+1)] = parameters['b' + str(l+1)] - learning_rate * grads['db' + str(l+1)]
    
    return parameters

In [23]:
parameters, grads = update_parameters_test_case()
parameters = update_parameters(parameters, grads, 0.1)

print(f"W1: {parameters['W1']}")
print(f"b1: {parameters['b1']}")
print(f"W2: {parameters['W2']}")
print(f"b2: {parameters['b2']}")

W1: [[-0.59562069 -0.09991781 -2.14584584  1.82662008]
 [-1.76569676 -0.80627147  0.51115557 -1.18258802]
 [-1.0535704  -0.86128581  0.68284052  2.20374577]]
b1: [[-0.04659241]
 [-1.28888275]
 [ 0.53405496]]
W2: [[-0.55569196  0.0354055   1.32964895]]
b2: [[-0.84610769]]
