In [1]:
# Packages
import numpy as np
from testCases import *
from gc_utils import sigmoid, relu, dictionary_to_vector, vector_to_dictionary, gradients_to_vector

In [54]:
# n dimensional gradient checking

def forward_prop(x, y, parameters):
    cache = {}
    w1 = parameters["W1"]
    w2 = parameters["W2"]
    w3 = parameters["W3"]
    b1 = parameters["b1"]
    b2 = parameters["b2"]
    b3 = parameters["b3"]

    z1 = np.dot(w1,x)+b1
    a1 = relu(z1)

    z2 = np.dot(w2, a1) + b2
    a2 = relu(z2)

    z3 = np.dot(w3, a2) + b3
    a3 = sigmoid(z3)

    cache["z1"] = z1
    cache["z2"] = z2
    cache["z3"] = z3
    cache["a1"] = a1
    cache["a2"] = a2
    cache["a3"] = a3

    cost =  np.sum(np.multiply(y, -np.log(a3)) + np.multiply(1-y, - np.log(1-a3)))/x.shape[1]

    return cache, cost


In [55]:
cache, cost = forward_prop(*gradient_check_n_test_case())
print(cost)

2.407833389834357


In [56]:
def backprop(x, y ,parameters , cache):
    m = x.shape[1]
    z1 = cache["z1"]
    z2 = cache["z2"]
    z3 = cache["z3"]
    a1 = cache["a1"]          # 5,3
    a2 = cache["a2"]            #3,3
    a3 = cache["a3"]            #1,3

    w1 = parameters["W1"]
    w2 = parameters["W2"]
    w3 = parameters["W3"]
    b1 = parameters["b1"]
    b2 = parameters["b2"]
    b3 = parameters["b3"]

    dz3  = a3 -y        #1,3
    da2 = np.dot(w3.T, dz3)        #3,3
    #
    dz2 = np.copy(da2)         #3,3
    dz2[z2<=0] = 0 
    #
    da1 = np.dot(w2.T, dz2)         #5,3
    #
    dz1 = np.copy(da1)
    dz1[z1<=0] = 0               #5,3

    dw3 = 1./m*np.dot(dz3 , a2.T)    #1,3
    db3 = 1./m * np.sum(dz3, axis=1, keepdims = True)   #1,1

    dw2 = 1./m*np.dot(dz2, a1.T)   #3,5
    db2 = 1./m * np.sum(dz2, axis=1, keepdims = True)   #3,1

    dw1 = 1./m *np.dot(dz1, x.T)    #5,4
    db1 = 1./m * np.sum(dz1, axis=1, keepdims = True)   #5,1

    caches = {}
    caches["dW3"] = dw3
    caches["dW2"] = dw2
    caches["dW1"] = dw1
    caches["db1"] = db1
    caches["db2"] = db2
    caches["db3"] = db3
    return caches








In [57]:
def update_parameters(parameters, caches , learning_rate):
    for item in parameters.keys():
        parameters[str(item)] = parameters[str(item)] - learning_rate*caches[str('d')+str(item)]

    return parameters

In [58]:
def model(num_iterations, learning_rate):
    x, y, parameters = gradient_check_n_test_case()
    costs = []
    for i in range (num_iterations):
        cache, cost = forward_prop(x, y, parameters)
        caches = backprop(x, y, parameters, cache)
        parameters = update_parameters(parameters, caches, learning_rate)
        if i%100==0:
            costs.append(cost)

    return costs

    

In [59]:
print(model(2000 , 0.001))

[2.407833389834357, 1.5687398899693203, 1.1386467367037953, 0.919205250718965, 0.8024691337687141, 0.734419456265721, 0.6899210989758918, 0.6569132533111964, 0.6291600798954751, 0.6031325200473504, 0.5766747041094027, 0.5484446891719149, 0.5325103415774931, 0.5207744543832705, 0.5091106833983515, 0.49753727803599607, 0.4862128517814133, 0.4751962335236111, 0.4644441399076767, 0.4540579433655954]


In [60]:
# def calculate_gradient(cost, epsilon):

def gradient_checking(x, y , parameters ,gradients ,  epsilon = 1e-3):
    
    grad = gradients_to_vector(gradients)
    theta, _ = dictionary_to_vector(parameters)
    num_parameters = theta.shape[0]
    # gradients = []
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    

    for i in range (num_parameters):
        temp_theta = np.copy(theta)
        temp_theta[i][0] = temp_theta[i][0] + epsilon
        temp_parameters = vector_to_dictionary(temp_theta)
        _ , J_plus[i] = forward_prop(x,y, temp_parameters)

        temp_theta = np.copy(theta)
        temp_theta[i] = temp_theta[i] - epsilon
        temp_parameters = vector_to_dictionary(temp_theta)
        _ , J_minus[i] = forward_prop(x,y, temp_parameters)

        gradapprox[i]= (J_plus[i] - J_minus[i])/(2*epsilon)
        

    numerator = np.linalg.norm(grad - gradapprox)
    dinominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)

    difference = numerator/dinominator

    if difference>2e-7:
        print("there is a problem in back propagation algorithm")
    else:
        print("the back propagation is correctly implemented")

    return difference

    

In [61]:
x, y, parameters = gradient_check_n_test_case()
cache , cost = forward_prop(x, y, parameters)
gradients = backprop(x,y, parameters, cache)

diff = gradient_checking(x, y, parameters, gradients)
print(diff , "diff")

the back propagation is correctly implemented
1.6797905539453286e-08 diff
