In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd

In [2]:
class Activations:
    
    @staticmethod
    def relu(z):
        a = np.maximum(0,z)
        return a
    
    @staticmethod
    def sigmoid(z):
        return 1/(1+np.exp(-z))
    
    @staticmethod
    def tanh(z):
        return np.tanh(z) 
    
    @staticmethod
    def sigmoid_derivative(dA, z):
        sig = sigmoid(z)
        return dA * sig * (1 - sig)
    
    @staticmethod
    def relu_derivative(self,z):
            return 0 if z < 0 else 1
    
    @staticmethod
    def tanh_derivative(z):
       return 1-np.tanh(z)**2

In [3]:
class NeuralNetwork:
    
    def __init__(self,layer_sizes, epochs, alpha):
        
        self.layer_sizes = layer_sizes
        self.num_iters = epochs
        self.learning_rate = alpha
    
    def __repr__(self):
        return f"ANN Hyperparameters\nLayers: {self.layers_sizes}\nLearning rate: {self.learning_rate} \
        \nIterations: {self.num_iters}"
    
    def initialize_params(self):
        params = {}
        for i in range(1, len(self.layer_sizes)):
            params['W' + str(i)] = np.random.randn(self.layer_sizes[i], self.layer_sizes[i-1])*0.01
            params['B' + str(i)] = np.random.randn(self.layer_sizes[i],1)*0.01
        return params

    def forward_propagation(self, X_train, params):
        layers = len(params)//2
        values = {}
        for i in range(1, layers+1):
            if i==1:
                values['Z' + str(i)] = np.dot(params['W' + str(i)], X_train) + params['B' + str(i)]
                values['A' + str(i)] = Activations.tanh(values['Z' + str(i)])
            else:
                values['Z' + str(i)] = np.dot(params['W' + str(i)], values['A' + str(i-1)]) + params['B' + str(i)]
                if i==layers:
                    values['A' + str(i)] = values['Z' + str(i)]
                else:
                    values['A' + str(i)] = Activations.relu(values['Z' + str(i)])
        return values

    def compute_cost(self, values, Y_train):
        layers = len(values)//2
        Y_pred = values['A' + str(layers)]
        cost = 1/(2*len(Y_train)) * np.sum(np.square(Y_pred - Y_train))
        return cost

    def backward_propagation(self, params, values, X_train, Y_train):
        layers = len(params)//2
        m = len(Y_train)
        grads = {}
        for i in range(layers,0,-1):
            if i==layers:
                dA = 1/m * (values['A' + str(i)] - Y_train)
                dZ = dA
            else:
                dA = np.dot(params['W' + str(i+1)].T, dZ)
                dZ = np.multiply(dA, np.where(values['A' + str(i)]>=0, 1, 0))
            if i==1:
                grads['W' + str(i)] = 1/m * np.dot(dZ, X_train.T)
                grads['B' + str(i)] = 1/m * np.sum(dZ, axis=1, keepdims=True)
            else:
                grads['W' + str(i)] = 1/m * np.dot(dZ,values['A' + str(i-1)].T)
                grads['B' + str(i)] = 1/m * np.sum(dZ, axis=1, keepdims=True)
        return grads

    def update_params(self, params, grads):
        layers = len(params)//2
        params_updated = {}
        for i in range(1,layers+1):
            params_updated['W' + str(i)] = params['W' + str(i)] - self.learning_rate * grads['W' + str(i)]
            params_updated['B' + str(i)] = params['B' + str(i)] - self.learning_rate * grads['B' + str(i)]
        return params_updated

    def train(self, X_train, Y_train):
        params = self.initialize_params()
        for i in range(self.num_iters):
            values = self.forward_propagation(X_train.T, params)
            cost = self.compute_cost(values, Y_train.T)
            grads = self.backward_propagation(params, values,X_train.T, Y_train.T)
            params = self.update_params(params, grads)
            print('Cost at iteration ' + str(i+1) + ' = ' + str(cost) + '\n')
        return params
    
    def compute_accuracy(self, X_train, X_test, Y_train, Y_test, params):
        values_train = self.forward_propagation(X_train.T, params)
        values_test = self.forward_propagation(X_test.T, params)
        train_acc = np.sqrt(mean_squared_error(Y_train, values_train['A' + str(len(self.layer_sizes)-1)].T))
        test_acc = np.sqrt(mean_squared_error(Y_test, values_test['A' + str(len(self.layer_sizes)-1)].T))
        return train_acc, test_acc

    def predict(self, X, params):
        values = self.forward_propagation(X.T, params)
        predictions = values['A' + str(len(values)//2)].T
        return predictions

In [4]:
# load the dataset
data = pd.read_csv("data_banknote_authentication.txt", header=None)
# split into input and output columns
X, Y = data.values[:, :-1], data.values[:, -1] #separate data into input and output features
# ensure all data are floating point values
X = X.astype('float32')
# encode strings to integer
Y = LabelEncoder().fit_transform(Y)
# split into train and test datasets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33) #split data into train and test sets in 80-20 ratio
# determine the number of input features
n_features = X.shape[1]

In [5]:
nn = NeuralNetwork([4,5,5,1],10,0.03)

In [6]:
params = nn.train(X_train,Y_train);

Cost at iteration 1 = 0.22469139202385582

Cost at iteration 2 = 0.22468480189905315

Cost at iteration 3 = 0.22467821220456924

Cost at iteration 4 = 0.224671622940376

Cost at iteration 5 = 0.2246650341064453

Cost at iteration 6 = 0.22465844570274907

Cost at iteration 7 = 0.22465185772925925

Cost at iteration 8 = 0.22464527018594768

Cost at iteration 9 = 0.22463868307278637

Cost at iteration 10 = 0.2246320963897471



In [7]:
predictions = nn.predict(X_train[4], params)

In [8]:
train_acc, test_acc = nn.compute_accuracy(X_train, X_test, Y_train, Y_test, params)

In [9]:
print('Accuracy of Training Data = ' + str(train_acc))

Accuracy of Training Data = 0.670261904238637


In [10]:
print('Accuracy of Test Data = ' + str(test_acc))

Accuracy of Test Data = 0.6568719978238466


In [11]:
print('Predictions = ', predictions)

Predictions =  [[0.00137821]
 [0.00137841]
 [0.00137421]
 [0.00137949]
 [0.00137405]]
