In [12]:
import numpy as np
from IPython.display import Image

In [2]:
NN_ARCHITECTURE = [
    {"input_dim": 2, "output_dim": 25, "activation": "relu"},
    {"input_dim": 25, "output_dim": 50, "activation": "relu"},
    {"input_dim": 50, "output_dim": 50, "activation": "relu"},
    {"input_dim": 50, "output_dim": 25, "activation": "relu"},
    {"input_dim": 25, "output_dim": 1, "activation": "sigmoid"},
]

In [5]:
# Function for initilizing our nn weights and biasis
def init_layers(nn_architecture, seed=101):
    np.random.seed(seed)
    number_of_layers = len(nn_architecture)
    params_values = {}
    
    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx + 1
        layer_input_size = layer["input_dim"]
        layer_output_size = layer["output_dim"]
        
        params_values[f"W{layer_idx}"] = np.random.randn(layer_output_size, layer_input_size) * 0.1
        params_values[f"B{layer_idx}"] = np.random.randn(layer_output_size) * 0.1
        
    return params_values
        

In [7]:
# Activation Functions and their respective dirivatives:

def sigmoid(Z):
    return 1 / (1 + np.exp(-Z))

def relu(Z):
    return np.maximum(0, Z)

def sigmoid_gradient(dA, Z):
    sig = sigmoid(Z)
    return dA * sig * (1 - sig)

def relu_gradient(dA, Z):
    dZ = np.array(dA, copy=True)
    dZ[Z <= 0] = 0;
    return dZ

In [9]:
# Given the inputs from a previous layer, calculate the next layer

def single_layer_forward_propagation(A_prev, W_curr, b_curr, activation="relu"):
    Z_curr = np.dot(W_curr * A_prev) + b_curr
    
    if activation is "relu":
        active_fn = relu
    elif activation is "sigmoid":
        active_fn = sigmoid
    else:
        raise Exception("Non-supported activation function")
        
    return active_fn(Z_curr), Z_curr

In [10]:
def full_forward_propagation(X, params_values, nn_architecture):
    # Create a temporary memory hash for the backwards_stop
    memory = {}
    A_curr = X # X vector is the activation for layer 0
    
    # Iterate over network layers:
    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx + 1
        # Transfer the previous activation into the current layer
        A_prev = A_curr
        
        active_fn_curr = layer["activation"]
        W_curr = params_values[f"W{layer_idx}"]
        b_curr = params_values[f"b{layer_idx}"]
        A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr, active_fn_curr)
        
        memory[f"A{idx}"] = A_prev
        memory[f"Z{layer_idx}"] = Z_curr # Why do we store Z_curr instead of A_curr
        
    return A_curr, memory

![Cost Function](./assets/cost_function.gif)

In [16]:
def get_cross_entropy_cost(Y_hat, Y):
    m = Y_hat.shape[1] # Number of examples
    # https://ml-cheatsheet.readthedocs.io/en/latest/loss_functions.html#id11
    # https://www.youtube.com/watch?v=mj5DpK5gGsY
    cost = (-1 / m) * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T))
    return np.squeeze(cost)