In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
data = pd.read_csv("./data_banknote_authentication.txt", header=None)
# seperate into input and output features
X, Y = data.values[:, :-1], data.values[:, -1]
Y = LabelEncoder().fit_transform(Y)
# split into train and test datasets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)

X_train = X_train.T
X_test = X_test.T

In [3]:
def relu(Z):
    return np.maximum(Z, 0)

def tanh(Z):
    return np.tanh(Z)

def sigmoid(Z):
    return 1/(1+np.exp(-Z))

def softmax(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A

def relu_deriv(Z):
    return Z > 0

def tanh_deriv(Z):
    return 1-np.tanh(Z)**2

def sigmoid_deriv(Z):
    sig = 1/(1+np.exp(-Z))
    return sig * (1 - sig)

def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

In [4]:
class NeuralNetwork:
    def __init__(self, layer_sizes, epochs, alpha, activations):
            self.layer_sizes = layer_sizes
            self.num_iters = epochs
            self.learning_rate = alpha
            self.activations = activations
            self.params = {}
            self.last = len(self.layer_sizes)
            self.init_params();
            
    def __repr__(self):
        return f"ANN Hyperparameters\nLayers: {self.layers_sizes}\nLearning rate: {self.learning_rate} \
        \nIterations: {self.num_iters}"
    
    # Initalize the network with random weights and biases
    def init_params(self):
        params = self.params
        for i in range(1, self.last):
            params[f'W{i}'] = np.random.rand(self.layer_sizes[i], self.layer_sizes[i-1]) - 0.5
            params[f'B{i}'] = np.random.rand(self.layer_sizes[i],1) - 0.5
            
    def forward_propagation(self, X):
        # Pass X through the network 
        params = self.params
        for i in range(1, self.last):
            if i==1:
                # For first layer compute dot with x
                output = params[f'W{i}'].dot(X) + params[f'B{i}']
            else: 
                # For all other layers compute with output of prev layer
                output = params[f'W{i}'].dot(params[f'A{i-1}']) + params[f'B{i}']
            if(i == self.last - 1):
                # For final layer use softmax
                activations = softmax(output)
            else:
                # For all other layers use the activ_fnc initalized in constructor
                activ_fnc = self.activations[i-1]
                activations = globals()[activ_fnc](output)
            
            params[f'Z{i}'] = output
            params[f'A{i}'] = activations


    def backward_propagation(self, X, Y):
        ohY = one_hot(Y)
        params = self.params
        
        for i in range(self.last - 1, 0, -1):
            if(i == self.last - 1):
                # For final layer calculate how wrong net is and compute gradients for weights and biases
                dZ =  params[f'A{i}'] - ohY
                params[f'dZ{i}'] = dZ
                params[f'dW{i}'] = 1 / m * dZ.dot(params[f'A{i-1}'].T)
                params[f'dB{i}'] = 1 / m * np.sum(dZ)
            elif (i == 1):
                # For first layer compute the gradients for weights and biases with input of dataset
                dZ = params[f'W{i+1}'].T.dot(params[f'dZ{i+1}']) * globals()[f'{self.activations[i-1]}_deriv'](params[f'Z{i}'])
                params[f'dZ{i}'] = dZ
                params[f'dW{i}'] = 1 / m * dZ.dot(X.T)
                params[f'dB{i}'] = 1 / m * np.sum(dZ)
            else: 
                # For all other layers compute the gradients for weights and biases with the output of the prev layer
                dZ = params[f'W{i+1}'].T.dot(params[f'dZ{i+1}']) * globals()[f'{self.activations[i-1]}_deriv'](params[f'Z{i}'])
                params[f'dZ{i}'] = dZ
                params[f'dW{i}'] = 1 / m * dZ.dot(params[f'A{i-1}'].T)
                params[f'dB{i}'] = 1 / m * np.sum(dZ)

    
    def update_params(self, alpha):
        # Update all layers based on the gradients calculated in back prop
        params = self.params
        for i in range(1, self.last):
            params[f'W{i}'] =  params[f'W{i}'] - self.learning_rate * params[f'dW{i}']
            params[f'B{i}'] =  params[f'B{i}'] - self.learning_rate * params[f'dB{i}']

    def make_predictions(self, X):
        self.forward_propagation(X)
        predictions = self.get_predictions()
        return predictions

    def test_predictions(self, i):
        curr = X_train[:, i, None]
        print("label ", Y_train[i])
        print("predicted ", self.make_predictions(curr)[0])

    def get_predictions(self):
        return np.argmax(self.params[f'A{self.last-1}'], 0)

    def get_accuracy(self, predictions, Y):
        return np.sum(predictions == Y) / Y.size
    
    def train(self, X, Y):
        for i in range(self.num_iters):
            self.forward_propagation(X)
            self.backward_propagation(X, Y)
            self.update_params(self.learning_rate)
       
            # Print accuracy every 10 iterations
            if i % 1 == 0:
                print("Iteration: ", i)
                predictions = self.get_predictions()
                print('Accuracy: ',self.get_accuracy(predictions, Y))
                
        return self.params

In [5]:
nn = NeuralNetwork([4,10,10,2], 500, 0.10, ['relu','relu'])

In [6]:
x = nn.train(X_train, Y_train)

Iteration:  0
Accuracy:  0.5669205658324266
Iteration:  1
Accuracy:  0.7181719260065288
Iteration:  2
Accuracy:  0.7562568008705114
Iteration:  3
Accuracy:  0.7671381936887922
Iteration:  4
Accuracy:  0.7812840043525572
Iteration:  5
Accuracy:  0.8204570184983678
Iteration:  6
Accuracy:  0.8683351468988031
Iteration:  7
Accuracy:  0.8813928182807399
Iteration:  8
Accuracy:  0.8933623503808488
Iteration:  9
Accuracy:  0.9031556039173014
Iteration:  10
Accuracy:  0.9129488574537541
Iteration:  11
Accuracy:  0.926006528835691
Iteration:  12
Accuracy:  0.9445048966267682
Iteration:  13
Accuracy:  0.9521218715995647
Iteration:  14
Accuracy:  0.9597388465723613
Iteration:  15
Accuracy:  0.9640914036996736
Iteration:  16
Accuracy:  0.9684439608269858
Iteration:  17
Accuracy:  0.970620239390642
Iteration:  18
Accuracy:  0.9749727965179543
Iteration:  19
Accuracy:  0.9782372143634385
Iteration:  20
Accuracy:  0.9793253536452666
Iteration:  21
Accuracy:  0.9793253536452666
Iteration:  22
Accurac

In [7]:
preds = nn.make_predictions(X_test)

In [8]:
nn.test_predictions(1)

label  1
predicted  1


In [9]:
n

5