In [19]:
import numpy as np
from helpers import *

In [21]:
def initialize_parameters(layers_size):
    """
    Returns parameters ('W1', 'b1', W2', 'b2', ...)
    """
    
    parameters = {}
    for l in range(1, len(layers_size)):
        parameters['W' + str(l)] = np.random.randn(layers_size[l], layers_size[l-1]) * 0.01
        parameters['b' + str(l)] = np.zeros((layers_size[l], 1))
    
    return parameters

In [24]:
def linearize_and_activate_forward_unit(A_previous, W, b, activation_function='relu'):
    """
    Returns A, layer_parameters(W, b, Z, A)
    """
    
    Z = np.dot(W, A_previous) + b
    if activation == 'relu':
        A = relu(Z)
    elif activation == 'sigmoid':
        A = sigmoid(Z)
    
    layer_parameters = {
        'W': W,
        'b': b,
        'Z': Z,
        'A': A_previous
    }
    
    return A, layer_parameters

In [34]:
def deep_forward(X, parameters):
    n_of_layers = parameters // 2
    A = X
    layers_parameters = []
    
    W = parameters
    
    for l in range(1, n_of_layers):
        W = parameters['W' + str(l)]
        b = parameters['b' + str(l)]
        A, params = linearize_and_activate_forward(A, w, b, 'relu')
        layers_parameters.append(params)
    # Last layer
    W = parameters['W' + str(n_of_layers-1)]
    b = parameters['b' + str(n_of_layers-1)]
    A_last, params = linearize_and_activate_forward(A, W, b, 'sigmoid')
    layers_parameters.append(params)
    
    return A_last, layers_parameters

In [35]:
def calculate_cost(Y_hat, Y):
    """
    Returns cost
    """
    m = Y.shape[1]
    
    log_probs = np.dot(Y, np.log(Y_hat)) + np.dot(1 - Y, np.log(1 - Y_hat))
    cost = -(1/m) * np.sum(log_probs)
    
    return cost

In [36]:
def propagate_back_activation(dA, Z, activation):
    """
    Returns dZ[L]
    """
    if activation == 'relu':
        dZ = np.multiply(dA, relu_derivative(Z))
    elif activation == 'sigmoid':
        dZ = np.multiply(dA, sigmoid_derivative(Z))
    
    return dZ

In [37]:
# Add A_prev, W, b to the cache
def propagate_back_linear(layer_parameters, dZ):
    """
    Returns dA, dW, db
    """
    W = layer_parameters['W']
    b = layer_parameters['b']
    A_previous = layer_parameters['A']
    m = A_previous.shape[1]
    
    dW = (1/m) * np.dot(dZ, A_previous.T)
    db = (1/m) * np.sum(dZ, axis=1, keepdims=True)
    dA_previous = np.dot(W.T, dZ)
    
    return dA_previous, dW, db

In [38]:
def backward_propagation_unit(dA, layer_parameters, activation):
    W = layer_parameters['W']
    b = layer_parameters['b']
    Z = layer_parameters['Z']
    A_previous = layer_parameters['A']
    dZ = propagate_back_activation(dA, Z, activation)
    dA_previous, dW, db = propagate_back_linear(A_previous, W, b, dZ)
    
    return dA_previous, dW, db

In [33]:
def deep_backward(AL, Y, layer_parameters):
    """
    layer_parameters: W, b, Z, A
    """
    m = Y.shape[1]
    layers_number = len(layer_parameters)
    Y = Y.reshape(AL.shape)
    gradients = {}
    
    dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    
    gradients['dA' + str(layers_number-1)], \
    gradients['dW' + str(layers_number)], \
    gradients['dW' + str(layers_number)] = backward_propagation_unit(dAL, layer_parameters, 'sigmoid')
    
    for l in range(layers_number-1, 0):
        current_layer_params = layer_parameters[l]
        gradients['dA' + str(l)], \
        gradients['dW' + str(l+1)], \
        gradients['db' + str(l+1)] = backward_propagation_unit(gradients['A' + str(l+2)], 
                                                               layer_parameters, 
                                                               'relu') 
    return gradients

In [None]:
def update_weights(parameters, gradients, learning_rate):
    n_of_layers = len(parameters) // 2
    
    for l in range(n_of_layers):
        parameters['W' + str(l+1)] = gradients[''] 