In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd

In [2]:
class Activations:
    
    @staticmethod
    def relu(z):
        a = np.maximum(0,z)
        return a
    
    @staticmethod
    def sigmoid(z):
        return 1/(1+np.exp(-z))
    
    @staticmethod
    def tanh(z):
        return np.tanh(z) 
    
    @staticmethod
    def sigmoid_derivative(dA, z):
        sig = sigmoid(z)
        return dA * sig * (1 - sig)
    
    @staticmethod
    def relu_derivative(self,z):
            return 0 if z < 0 else 1
    
    @staticmethod
    def tanh_derivative(z):
       return 1-np.tanh(z)**2

In [3]:
class NeuralNetwork:
    
    def __init__(self,layer_sizes, epochs, alpha, activations):
        
        self.layer_sizes = layer_sizes
        self.num_iters = epochs
        self.learning_rate = alpha
        self.activations = activations
        
    def __repr__(self):
        return f"ANN Hyperparameters\nLayers: {self.layers_sizes}\nLearning rate: {self.learning_rate} \
        \nIterations: {self.num_iters}"
    
    def initialize_params(self):
        params = {}
        for i in range(1, len(self.layer_sizes)):
            params['W' + str(i)] = np.random.randn(self.layer_sizes[i], self.layer_sizes[i-1])*0.01
            params['B' + str(i)] = np.random.randn(self.layer_sizes[i],1)*0.01
        return params

    def forward_propagation(self, X_train, params):
        layers = len(params)//2
        values = {}
        for i in range(1, layers+1):
            
            activation_function_str = self.activations[i-1]
            activation = getattr(Activations,activation_function_str)
            
            if i==1:
                values['Z' + str(i)] = np.dot(params['W' + str(i)], X_train) + params['B' + str(i)]
                values['A' + str(i)] = activation(values['Z' + str(i)])
            else:
                values['Z' + str(i)] = np.dot(params['W' + str(i)], values['A' + str(i-1)]) + params['B' + str(i)]
                if i==layers:
                    values['A' + str(i)] = values['Z' + str(i)]
                else:
                    values['A' + str(i)] = activation(values['Z' + str(i)])
        return values

    def compute_cost(self, values, Y_train):
        layers = len(values)//2
        Y_pred = values['A' + str(layers)]
        cost = 1/(2*len(Y_train)) * np.sum(np.square(Y_pred - Y_train))
        return cost

    def backward_propagation(self, params, values, X_train, Y_train):
        layers = len(params)//2
        m = len(Y_train)
        grads = {}
        for i in range(layers,0,-1):
            if i==layers:
                dA = 1/m * (values['A' + str(i)] - Y_train)
                dZ = dA
            else:
                dA = np.dot(params['W' + str(i+1)].T, dZ)
                dZ = np.multiply(dA, np.where(values['A' + str(i)]>=0, 1, 0))
            if i==1:
                grads['W' + str(i)] = 1/m * np.dot(dZ, X_train.T)
                grads['B' + str(i)] = 1/m * np.sum(dZ, axis=1, keepdims=True)
            else:
                grads['W' + str(i)] = 1/m * np.dot(dZ,values['A' + str(i-1)].T)
                grads['B' + str(i)] = 1/m * np.sum(dZ, axis=1, keepdims=True)
        return grads

    def update_params(self, params, grads):
        layers = len(params)//2
        params_updated = {}
        for i in range(1,layers+1):
            params_updated['W' + str(i)] = params['W' + str(i)] - self.learning_rate * grads['W' + str(i)]
            params_updated['B' + str(i)] = params['B' + str(i)] - self.learning_rate * grads['B' + str(i)]
        return params_updated

    def train(self, X_train, Y_train, show_cost = False):
        params = self.initialize_params()
        for i in range(self.num_iters):
            values = self.forward_propagation(X_train.T, params)
            cost = self.compute_cost(values, Y_train.T)
            grads = self.backward_propagation(params, values,X_train.T, Y_train.T)
            params = self.update_params(params, grads)
            if (show_cost == True):
                print('Cost at iteration ' + str(i+1) + ' = ' + str(cost) + '\n')
        self.params = params
        self.X_train = X_train
        self.Y_train = Y_train
    
    def test(self, X_test, Y_test):
        values_train = self.forward_propagation(self.X_train.T, self.params)
        values_test = self.forward_propagation(X_test.T, self.params)
        train_acc = np.sqrt(mean_squared_error(self.Y_train, values_train['A' + str(len(self.layer_sizes)-1)].T))
        test_acc = np.sqrt(mean_squared_error(Y_test, values_test['A' + str(len(self.layer_sizes)-1)].T))
        self.Y_test = Y_test
        self.X_test = X_test
        self.train_acc = train_acc
        self.test_acc = test_acc
        return train_acc, test_acc

    def predict(self, X):
        values = self.forward_propagation(X.T, self.params)
        predictions = values['A' + str(len(values)//2)].T
        return predictions
    
    def change_layers(self, layers):
        old_test_acc = self.test_acc
        self.layer_sizes = layers
        self.train(self.X_train, self.Y_train)
        new_train_acc, new_test_acc = self.test(self.X_test, self.Y_test)
        self.test_acc = new_test_acc
        print(f"Old test accuracy: {old_test_acc:.4f}\nNew test accuracy: {self.test_acc:.4f}")
    
    def change_activations(self, activations):
        old_test_acc = self.test_acc
        self.activations = activations
        self.train(self.X_train, self.Y_train)
        new_train_acc, new_test_acc = self.test(self.X_test, self.Y_test)
        self.test_acc = new_test_acc
        print(f"Old test accuracy: {old_test_acc:.4f}\nNew test accuracy: {self.test_acc:.4f}")
        
    def change_epochs(self, epochs):
        old_test_acc = self.test_acc
        self.num_iters = epochs
        self.train(self.X_train, self.Y_train)
        new_train_acc, new_test_acc = self.test(self.X_test, self.Y_test)
        self.test_acc = new_test_acc
        print(f"Old test accuracy: {old_test_acc:.4f}\nNew test accuracy: {self.test_acc:.4f}")
    
    def change_learning_rate(self,alpha):
        old_test_acc = self.test_acc
        self.learning_rate = alpha
        self.train(self.X_train, self.Y_train)
        new_train_acc, new_test_acc = self.test(self.X_test, self.Y_test)
        self.test_acc = new_test_acc
        print(f"Old test accuracy: {old_test_acc:.4f}\nNew test accuracy: {self.test_acc:.4f}")
        

In [4]:
# load the dataset
data = pd.read_csv("data_banknote_authentication.txt", header=None)
# split into input and output columns
X, Y = data.values[:, :-1], data.values[:, -1] #separate data into input and output features
# ensure all data are floating point values
X = X.astype('float32')
# encode strings to integer
Y = LabelEncoder().fit_transform(Y)
# split into train and test datasets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33) #split data into train and test sets in 80-20 ratio
# determine the number of input features
n_features = X.shape[1]

In [5]:
nn = NeuralNetwork([4,5,5,1],10,0.03,['tanh','relu','relu', 'sigmoid'])

In [6]:
## To see cost in iterations, use "show_cost = True" argument
nn.train(X_train,Y_train);

In [7]:
predictions = nn.predict(X_train[4])

In [8]:
train_acc, test_acc = nn.test(X_test, Y_test)

In [9]:
print('Accuracy of Training Data = ' + str(train_acc))

Accuracy of Training Data = 0.6715688755896381


In [10]:
print('Accuracy of Test Data = ' + str(test_acc))

Accuracy of Test Data = 0.655680848463995


In [11]:
print('Predictions = ', predictions)

Predictions =  [[0.0006391 ]
 [0.00063485]
 [0.00063707]
 [0.0006348 ]
 [0.00063471]]


In [12]:
nn.change_layers([4,16,16,8,1])

Old test accuracy: 0.6557
New test accuracy: 0.6556


In [13]:
nn.change_activations(['sigmoid','tanh','sigmoid','relu'])

Old test accuracy: 0.6556
New test accuracy: 0.6661


In [14]:
nn.change_learning_rate(0.1)

Old test accuracy: 0.6661
New test accuracy: 0.6492


In [15]:
nn.change_epochs(100)

Old test accuracy: 0.6492
New test accuracy: 0.6368
