<a href="https://colab.research.google.com/github/shaarick/F21BC/blob/main/Step%201.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [104]:
import numpy as np
import pandas as pd

In [105]:
# list with parameters describing a neural network
  # input dimension = input signal vector size for the layer
  # output dimension = output activation vector of the layer
  # activation function used on the layer
nn_parameters = [
    {"input_dimension": 2, "output_dimension": 4, "activation": "tanh"}, # tanh used on first layer
    {"input_dimension": 4, "output_dimension": 6, "activation": "relu"}, # relu used on hidden layers
    {"input_dimension": 6, "output_dimension": 6, "activation": "relu"},
    {"input_dimension": 6, "output_dimension": 4, "activation": "relu"}, 
    {"input_dimension": 4, "output_dimension": 1, "activation": "sigmoid"}, # sigmoid used on final layer
]


In [106]:
# initiates the weights and biases of the nn
def init_layers(nn_parameters, seed = 90):
    np.random.seed(seed)
    number_layers = len(nn_parameters)
    param_values = {}

    for index, layer in enumerate(nn_parameters):
        layer_index = index + 1
        input_size = layer["input_dimension"]
        output_size = layer["output_dimension"]
        
        param_values['Weight' + str(layer_index)] = np.random.randn(
            output_size, input_size) * 0.1
        param_values['bias' + str(layer_index)] = np.random.randn(
            output_size, 1) * 0.1
        
    return param_values

In [107]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

def relu(x):
    return np.maximum(0,x)

def tanh(x):
    return np.tanh(x) 

def sigmoid_derivative(dA, x):
    sig = sigmoid(x)
    return dA * sig * (1 - sig)

def relu_derivative(self,x):
        return 0 if x < 0 else 1

def tanh_derivative(x):
   return 1-np.tanh(x)**2


In [108]:
sigmoid(7), relu(7), tanh(0.5), relu(-7)

(0.9990889488055994, 7, 0.46211715726000974, 0)

In [109]:
# forward propagation of a single layer
def forward_prop_single_layer(A_prev, Weight_curr, bias_curr, activation="relu"):
    x_curr = np.dot(Weight_curr, A_prev) + bias_curr
    
    if activation is "tanh":
        activ_func = tanh
    elif activation is "relu":
        activ_func = relu
    elif activation is "sigmoid":
        activ_func = sigmoid
    else:
        raise Exception('Non-supported activation function')
        
    return activation_func(x_curr), x_curr

In [110]:
def full_forward_prop(X, param_values, nn_parameters):
    memory = {}
    A_curr = X
    
    for index, layer in enumerate(nn_parameters):
        layer_index = index + 1
        A_prev = A_curr
        
        activ_function_curr = layer["activation"]
        Weight_curr = param_values["Weight" + str(layer_index)]
        bias_curr = param_values["bias" + str(layer_index)]
        A_curr, x_curr = forward_prop_single_layer(A_prev, Weight_curr, bias_curr, activ_function_curr)
        
        memory["A" + str(index)] = A_prev
        memory["x" + str(layer_index)] = x_curr
       
    return A_curr, memory

In [111]:
# cost function and accuracy value calculation
def get_cost(Y_hat, Y):
    m = Y_hat.shape[1]
    cost = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T)) # binary crossentropy used as problem is binary classification
    return np.squeeze(cost)

def get_accuracy(Y_hat, Y):
    Y_hat_ = convert_prob_into_class(Y_hat)
    return (Y_hat_ == Y).all(axis=0).mean()

In [112]:
def backward_prop_single_layer(dA_curr, Weight_curr, bias_curr, x_curr, A_prev, activation="relu"):
    m = A_prev.shape[1]
    
    if activation is "tanh":
        backward_activ_func = tanh_derivative
    elif activation is "relu":
        backward_activ_func = relu_derivative
    elif activation is "sigmoid":
        backward_activ_func = sigmoid_derivative
    else:
        raise Exception('Non-supported activation function')
    
    dx_curr = backward_activ_func(dA_curr, x_curr)
    dWeight_curr = np.dot(dx_curr, A_prev.T) / m
    dbias_curr = np.sum(dx_curr, axis=1, keepdims=True) / m
    dA_prev = np.dot(Weight_curr.T, dx_curr)

    return dA_prev, dWeight_curr, dbias_curr

In [113]:
def full_backward_prop(Y_hat, Y, memory, param_values, nn_parameters):
    gradient_values = {}
    m = Y.shape[1]
    Y = Y.reshape(Y_hat.shape)
   
    dA_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat));
    
    for layer_index_prev, layer in reversed(list(enumerate(nn_parameters))):
        layer_index_curr = layer_index_prev + 1
        activ_function_curr = layer["activation"]
        
        dA_curr = dA_prev
        
        A_prev = memory["A" + str(layer_index_prev)]
        x_curr = memory["x" + str(layer_index_curr)]
        Weight_curr = param_values["Weight" + str(layer_index_curr)]
        bias_curr = param_values["bias" + str(layer_index_curr)]
        
        dA_prev, dWeight_curr, dbias_curr = backward_prop_single_layer(
            dA_curr, Weight_curr, bias_curr, x_curr, A_prev, activ_function_curr)
        
        gradient_values["dWeight" + str(layer_index_curr)] = dWeight_curr
        gradient_values["dbias" + str(layer_index_curr)] = dbias_curr
    
    return gradient_values

In [114]:
# using gradient descent to update parameter values
def update(param_values, gradient_values, nn_parameters, learning_rate):
    for layer_index, layer in enumerate(nn_parameters):
        param_values["Weight" + str(layer_index)] -= learning_rate * gradient_values["dWeight" + str(layer_index)]        
        param_values["bias" + str(layer_index)] -= learning_rate * gradient_values["dbias" + str(layer_index)]

    return param_values;

In [115]:
def train(X, Y, nn_parameters, epochs, learning_rate):
    param_values = init_layers(nn_parameters, 2)
    cost_history = []
    accuracy_history = []
    
    for i in range(epochs):
        Y_hat, cashe = full_forward_prop(X, param_values, nn_parameters)
        cost = get_cost(Y_hat, Y)
        cost_history.append(cost)
        accuracy = get_accuracy(Y_hat, Y)
        accuracy_history.append(accuracy)
        
        gradient_values = full_backward_prop(Y_hat, Y, cashe, param_values, nn_parameters)
        param_values = update(param_values, gradient_values, nn_parameters, learning_rate)
        
    return param_values, cost_history, accuracy_history

In [116]:
df = pd.read_csv("data_banknote_authentication.txt")


In [117]:
df.head()


Unnamed: 0,3.6216,8.6661,-2.8073,-0.44699,0
0,4.5459,8.1674,-2.4586,-1.4621,0
1,3.866,-2.6383,1.9242,0.10645,0
2,3.4566,9.5228,-4.0112,-3.5944,0
3,0.32924,-4.4552,4.5718,-0.9888,0
4,4.3684,9.6718,-3.9606,-3.1625,0
