In [2]:
import numpy as np
import cv2
from tqdm import tqdm
import matplotlib.pyplot as plt
import csv

In [3]:
nn_architecture = [
    {"input_dim": 2, "output_dim": 4, "activation": "relu"},
    {"input_dim": 4, "output_dim": 6, "activation": "relu"},
    {"input_dim": 6, "output_dim": 6, "activation": "relu"},
    {"input_dim": 6, "output_dim": 4, "activation": "relu"},
    {"input_dim": 4, "output_dim": 1, "activation": "sigmoid"},
]

In [4]:
def init_layers(nn_architecture:{}, seed=99):
    # TODO: use Xavier
    # https://towardsdatascience.com/weight-initialization-techniques-in-neural-networks-26c649eb3b78
    np.random.seed(sees)
    nb_layer = len(nn_architecture)
    params_values = {}
    
    for idx, layer in enumerate(nn_architecture):
        idx_layer = idx + 1
        input_size = layer["input_dim"]
        output_size = layer["output_dim"]
        
        params_values["W" + str(idx_layer)] = np.random.randn(output_size, input_size) * 0.1
        params_values['b' + str(idx_layer)] = np.random.randn(output_size, 1) * 0.1
    
    return params_values

In [10]:
def sigmoid(Z):
    return 1/(1 + np.exp(Z))

def relu(Z):
    return np.maximum(0, Z)

def sigmoid_backward(dA, Z):
    sig = sigmoid(Z)
    return dA * sig * (1 -sig)

def relu_backward(dA, Z):
    dZ = np.array(dA)
    dZ[Z < 0]  = 0
    return dZ

In [11]:
def single_layer_forward_propagation(A_prev:np.ndarray, W_curr: np.ndarray, b_curr: np.ndarray, activation='relu'):
    Z_curr = np.dot(W_curr, A_prev) + b_curr
    
    if activation == "relu":
        activation_func = relu
    elif activation =="sigmoid":
        activation_func = sigmoid
    else:
        raise Exception("Non-supported activation function.")
        
    return activation_func(Z_curr), Z_curr

In [13]:
def forward_propagation(X, params_values, nn_architecture):
    caches = {}
    
    A_curr = X
    for idx, layer in enumerate(nn_architecture):
        idx_layer = idx + 1
        A_prev = A_curr
        
        tmp_activation = layer["activation"]
        W_curr = params_values['W' + str(idx_layer)]
        b_curr = params_values['b' + str(idx_layer)]
        A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr, tmp_activation)
        
        caches['A' + str(idx)] = A_prev
        caches["Z" + str(idx_layer)] = Z_curr
        
    
    return A_curr, caches

In [14]:
def get_cost_value(Y_pred, Y):
    m = Y_pred.shape[1]
    
    cost = -1 / m * (np.dot(Y, np.log(Y_pred).T) + np.dot(1 - Y, np.log(1 - Y_pred).T)) 
    
    return np.squeeze(cost)

In [15]:
def get_accuracy_value(Y_pred, Y):
    Y_pred = Y_hat > 0.5
    return (Y_pred == Y).all(axis=0).mean()

In [16]:
def single_layer_backward_propagation(dA_curr, W_curr, b_curr, Z_curr, A_prev, activation="relu"):
    m = A_prev.shape[1]
    
    if activation is "relu":
        backward_activation_func = relu_backward
    elif activation is "sigmoid":
        backward_activation_func = sigmoid_backward
    else:
        raise Exception('Non-supported activation function.')
        
    dZ_curr = backward_activation_func(dA_curr, Z_curr)
    
    dW_curr = np.dot(dZ_curr, A_prev.T) / m
    db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m
    dA_prev = np.dot(W_curr.T, dZ_curr)
    
    return dA_prev, dW_curr, db_curr

In [18]:
def backward_propagation(Y_pred, Y, caches, params_values, nn_architecture):
    grads = {}
    m = Y.shape[1]
    Y = Y.reshape(Y_pred.shape)
    
    dA_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat));
    
    for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):
        layer_idx_curr = layer_idx_prev + 1
        activ_function_curr = layer["activation"]
        
        dA_curr = dA_prev
        
        A_prev = memory["A" + str(layer_idx_prev)]
        Z_curr = memory["Z" + str(layer_idx_curr)]
        W_curr = params_values["W" + str(layer_idx_curr)]
        b_curr = params_values["b" + str(layer_idx_curr)]
        
        dA_prev, dW_curr, db_curr = single_layer_backward_propagation(
            dA_curr, W_curr, b_curr, Z_curr, A_prev, activ_function_curr)
        
        grads_values["dW" + str(layer_idx_curr)] = dW_curr
        grads_values["db" + str(layer_idx_curr)] = db_curr
    
    return grads_values

In [19]:
def update(params_values, grads_values, nn_architecture, learning_rate):
    for layer_idx, layer in enumerate(nn_architecture):
        params_values["W" + str(layer_idx)] -= learning_rate * grads_values["dW" + str(layer_idx)]        
        params_values["b" + str(layer_idx)] -= learning_rate * grads_values["db" + str(layer_idx)]

    return params_values;

In [21]:
def train(X, Y, nn_architecture, epochs, learning_rate):
    params_values = init_layers(nn_architecture, 2)
    cost_history = []
    accuracy_history = []
    
    for i in range(epochs):
        Y_hat, cache = forward_propagation(X, params_values, nn_architecture)
        cost = get_cost_value(Y_hat, Y)
        cost_history.append(cost)
        accuracy = get_accuracy_value(Y_hat, Y)
        accuracy_history.append(accuracy)
        
        grads_values = backward_propagation(Y_hat, Y, cache, params_values, nn_architecture)
        params_values = update(params_values, grads_values, nn_architecture, learning_rate)
        
    return params_values, cost_history, accuracy_history