In [1]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
from testCases_v4 import *
from dnn_utils import sigmoid, sigmoid_backward, relu, relu_backward
plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

np.random.seed(1)

In [2]:
def initialize_parameters(n_x,n_h,n_y):
    np.random.seed(1)
    w1 = np.random.randn(n_h,n_x) * 0.01
    b1 = np.zeros(shape=(n_h,1))
    w2 = np.random.randn(n_y,n_h) * 0.01
    b2 = np.zeros(shape=(n_y,1))
    parameters = {"w1": w1,
                  "w2": w2,
                  "b1": b1,
                  "b2": b2}
    return parameters

In [3]:
parameters = initialize_parameters(2,2,1)
print("W1 = " + str(parameters["w1"]))
print("W2 = " + str(parameters["w2"]))
print("b1 = " + str(parameters["b1"]))
print("b2 = " + str(parameters["b2"]))

W1 = [[ 0.01624345 -0.00611756]
 [-0.00528172 -0.01072969]]
W2 = [[ 0.00865408 -0.02301539]]
b1 = [[0.]
 [0.]]
b2 = [[0.]]


In [4]:
def initialize_parameters_deep(layer_dims):
    np.random.seed(3)
    parameters = {}
    L = len(layer_dims)
    for l in range(1,L):
        parameters["w"+str(l)] = np.random.randn(layer_dims[1], layer_dims[l-1]) * 0.01
        parameters["b"+str(l)] = np.zeros((layer_dims[l],1))
    return parameters

In [5]:
parameters = initialize_parameters_deep([5,4,3])
print("W1 = " + str(parameters["w1"]))
print("W2 = " + str(parameters["w2"]))
print("b1 = " + str(parameters["b1"]))
print("b2 = " + str(parameters["b2"]))

W1 = [[ 0.01788628  0.0043651   0.00096497 -0.01863493 -0.00277388]
 [-0.00354759 -0.00082741 -0.00627001 -0.00043818 -0.00477218]
 [-0.01313865  0.00884622  0.00881318  0.01709573  0.00050034]
 [-0.00404677 -0.0054536  -0.01546477  0.00982367 -0.01101068]]
W2 = [[-0.01185047 -0.0020565   0.01486148  0.00236716]
 [-0.01023785 -0.00712993  0.00625245 -0.00160513]
 [-0.00768836 -0.00230031  0.00745056  0.01976111]
 [-0.01244123 -0.00626417 -0.00803766 -0.02419083]]
b1 = [[0.]
 [0.]
 [0.]
 [0.]]
b2 = [[0.]
 [0.]
 [0.]]


In [6]:
def linear_forward(a,w,b):
    z = np.dot(w,a) + b
    cache = (a,w,b)
    return z, cache

In [7]:
a,w,b = linear_forward_test_case()
z,linear_cache = linear_forward(a,w,b)
print("z = " + str(z))

z = [[ 3.26295337 -1.23429987]]


In [8]:
def linear_activation_forward(a_prev, w, b, activation):
    if activation == "sigmoid":
        z, linear_cache = linear_forward(a_prev,w,b)
        a, activation_cache = sigmoid(z)
    elif activation == "relu":
        z, linear_cache = linear_forward(a_prev,w,b)
        a, activation_cache = relu(z)
    cache = (linear_cache, activation_cache)
    return a, cache

In [9]:
a_prev, w,b = linear_activation_forward_test_case()
a, linear_activation_cache = linear_activation_forward(a_prev, w, b,activation="sigmoid")
print("With sigmoid: a = "+ str(a))
a, linear_activation_cache = linear_activation_forward(a_prev, w, b,activation="relu")
print("With ReLU: a = "+ str(a))


With sigmoid: a = [[0.96890023 0.11013289]]
With ReLU: a = [[3.43896131 0.        ]]


In [10]:
def L_model_forward(x, parameters):
    caches = []
    a = x
    L = len(parameters) // 2
    for l in range(1,L):
        a_prev = a
        a, cache = linear_activation_forward(a_prev,
                                             parameters["W" + str(l)],
                                             parameters["b" + str(l)],
                                             activation='relu')
        caches.append(cache)
        
    al, cache = linear_activation_forward(a,
                                          parameters["W" + str(L)],
                                          parameters["b" + str(L)],
                                          activation="sigmoid")
    caches.append(cache)
    return al, caches

In [11]:
x, parameters = L_model_forward_test_case()
al, caches = L_model_forward(x, parameters)
print("AL = " + str(al))
print("Length of caches list = "+ str(len(caches)))

AL = [[0.17007265 0.2524272 ]]
Length of caches list = 2


In [12]:
def compute_cost(al ,y):
    m = y.shape[1]
    cost = (-1/m) * np.sum(np.multiply(y,np.log(al)) + np.multiply(1-y, np.log(1-al)))
    cost = np.squeeze(cost)
    assert(cost.shape == ())
    return cost

In [13]:
y, al = compute_cost_test_case()
print("cost = " + str(compute_cost(al,y)))

cost = 0.41493159961539694


In [18]:
def linear_backward(dz, cache):
    a_prev, w, b = cache
    m = a_prev.shape[1]
    dw = np.dot(dz,cache[0].T)/m
    db = np.sum(dz,axis = 1, keepdims=True)/m
    da_prev = np.dot(cache[1].T,dz)
    print(db.shape,b.shape)
    assert (da_prev.shape == a_prev.shape)
    assert (dw.shape == w.shape)
    assert (db.shape == b.shape)
    
    return da_prev, dw, db

In [19]:
dz, linear_cache = linear_backward_test_case()
da_prev, dw, db = linear_backward(dz, linear_cache)
print("da_prev = " + str(da_prev))
print("dw = " + str(dw))
print("db = " + str(db))

(1, 1) (1, 1)
da_prev = [[ 0.51822968 -0.19517421]
 [-0.40506361  0.15255393]
 [ 2.37496825 -0.89445391]]
dw = [[-0.10076895  1.40685096  1.64992505]]
db = [[0.50629448]]


In [20]:
def linear_activation_backward(da, cache, activation):
    linear_cache, activation_cache = cache
    if activation == "relu":
        dz = relu_backward(da, activation_cache)
    elif activation == "sigmoid":
        dz = sigmoid_backward(da, activation_cache)
    da_prev, dw, db = linear_backward(dz, linear_cache)
    return da_prev, dw, db

In [21]:
al, linear_activation_cache = linear_activation_backward_test_case()
da_prev , dw, db = linear_activation_backward(al, linear_activation_cache, activation = "sigmoid")
print("Sigmoid:")
print("da_prev = " + str(da_prev))
print("dw = " + str(dw))
print("db = " + str(db))
da_prev , dw, db = linear_activation_backward(al,linear_activation_cache, activation = "relu")
print("ReLU:")
print("da_prev = " + str(da_prev))
print("dw = " + str(dw))
print("db = " + str(db))


(1, 1) (1, 1)
Sigmoid:
da_prev = [[ 0.11017994  0.01105339]
 [ 0.09466817  0.00949723]
 [-0.05743092 -0.00576154]]
dw = [[ 0.10266786  0.09778551 -0.01968084]]
db = [[-0.05729622]]
(1, 1) (1, 1)
ReLU:
da_prev = [[ 0.44090989 -0.        ]
 [ 0.37883606 -0.        ]
 [-0.2298228   0.        ]]
dw = [[ 0.44513824  0.37371418 -0.10478989]]
db = [[-0.20837892]]


In [22]:
def L_model_backward(al, y, caches):
    grads = {}
    L = len(caches)
    m = al.shape[1]
    #y = y.reshape[al.shape]
    dal = - (np.divide(y,al) - np.divide(1-y,1-al))
    current_cache = caches[-1]
    grads["da" + str(L-1)],grads["dw" + str(L)],grads["db" + str(L)] = linear_activation_backward(dal,current_cache,activation = "sigmoid")
    for l in reversed(range(L-1)):
        current_cache = caches[l]
        da_prev_temp, dw_temp, db_temp = linear_activation_backward(grads["da" + str(l + 1)], current_cache, activation="relu")
        grads["da" + str(l+1)] = da_prev_temp
        grads["dw" + str(l+1)] = dw_temp
        grads["db" + str(l+1)] = db_temp
    return grads

In [23]:
al, y_assess, caches = L_model_backward_test_case()
grads = L_model_backward(al, y_assess, caches)
print ("dW1 = "+ str(grads["dw1"]))
print ("db1 = "+ str(grads["db1"]))
print ("dA1 = "+ str(grads["da1"]))

(1, 1) (1, 1)
(3, 1) (3, 1)
dW1 = [[0.41010002 0.07807203 0.13798444 0.10502167]
 [0.         0.         0.         0.        ]
 [0.05283652 0.01005865 0.01777766 0.0135308 ]]
db1 = [[-0.22007063]
 [ 0.        ]
 [-0.02835349]]
dA1 = [[ 0.          0.52257901]
 [ 0.         -0.3269206 ]
 [ 0.         -0.32070404]
 [ 0.         -0.74079187]]


In [30]:
def update_parameters(parameters,grads , learning_rate):
    L = len(parameters) //2
    for l in range(L):
        parameters["w" + str(l+1)] = parameters["w" + str(l+1)] - learning_rate * grads["dw"+str(l+1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db"+str(l+1)]
    return parameters

In [31]:
parameters, grads = update_parameters_test_case()
parameters, update_parameters(parameters,grads, 0.1)
print ("W1 = " + str(parameters["w1"]))
print ("b1 = " + str(parameters["b1"]))
print ("W2 = " + str(parameters["w2"]))
print ("b2 = " + str(parameters["b2"]))

W1 = [[-0.59562069 -0.09991781 -2.14584584  1.82662008]
 [-1.76569676 -0.80627147  0.51115557 -1.18258802]
 [-1.0535704  -0.86128581  0.68284052  2.20374577]]
b1 = [[-0.04659241]
 [-1.28888275]
 [ 0.53405496]]
W2 = [[-0.55569196  0.0354055   1.32964895]]
b2 = [[-0.84610769]]
