In [1]:
import numpy as np
import h5py
import matplotlib.pyplot as plt

%matplotlib inline
np.random.seed(1)

In [2]:
def sigmoid(x):
    a = 1 / (1 + np.exp(-x))
    return a

def relu(x):
    a = np.maximum(0, x)
    return a

def sigmoid_derivative(x):
    s = sigmoid(x)
    d = s * (1 - s)
    return d

def relu_derivative(x):
    c = x.copy()
    c[c<0] = 0
    c[c>=0] = 1
    return c

In [3]:
def initialize_parameters(layer_dims):
    parameters = {}
    for i in range(1, len(layer_dims)):
        parameters["W" + str(i)] = np.random.randn(layer_dims[i], layer_dims[i-1])
        parameters["b" + str(i)] = np.zeros((layer_dims[i], 1))
    return parameters

In [4]:
def layer_forward(A_prev, W, b, activation):
    Z = np.dot(W, A_prev) + b
    
    if activation == 'sigmoid':
        A = sigmoid(Z)
    elif activation == 'relu':
        A = relu(Z)
    
    cache = (A_prev, W, b, Z)
    return A, cache

In [5]:
def model_forward(X, parameters):
    caches = []
    
    l = len(parameters) // 2
    A = X
    
    # 1 - (n-1) layer activation: relu
    for i in range(1, l):
        A, cache = layer_forward(A, parameters["W" + str(i)], parameters["b" + str(i)], "relu")
        caches.append(cache)
        
    # last layer activation: sigmoid
    A, cache = layer_forward(A, parameters["W" + str(l)], parameters["b" + str(l)], "sigmoid")
    caches.append(cache)
    
    return A, caches

In [6]:
def compute_cost(A, Y):
    m = Y.shape[1]
    cost = -1 / m * (np.sum(Y * np.log(A)) + np.sum((1 - Y) * np.log(1 - A)))
    cost = np.squeeze(cost)
    return cost

In [10]:
def layer_backward(dA, cache, activation):
    A_prev, W, b, Z = cache
    m = dA.shape[1]
    
    if activation == 'sigmoid':
        dZ = dA * sigmoid_derivative(Z)
    elif activation == 'relu':
        dZ = dA * relu_derivative(Z)
    
    dW = 1 / m * np.dot(dZ, A_prev.T)
    db = 1 / m * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)
    
    return dA_prev, dW, db

In [11]:
def model_backward(AL, Y, caches):
    grads = {}
    l = len(caches)
    m = AL.shape[1]
    
    dA = - (Y / AL - (1 - Y) / (1 - AL))
    dA, dW, db = layer_backward(dA, caches[-1], 'sigmoid')
    grads["dW" + str(l)] = dW
    grads["db" + str(l)] = db
    
    for i in range(l - 1, -1, -1):
        dA, dW, db = layer_backward(dA, caches[-1], 'relu')
        grads["dW" + str(i + 1)] = dW
        grads["db" + str(i + 1)] = db
    
    return grads

In [12]:
def update_parameters(parameters, grads, learning_rate):
    l = parameters // 2
    for i in range(l):
        parameters["W" + str(i + 1)] -= learning_rate * grads["dW" + str(i + 1)]
        parameters["b" + str(i + 1)] -= learning_rate * grads["db" + str(i + 1)]
        
    return parameters