In [1]:
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt
import math

In [2]:
train_data  = pd.read_csv("./Alphabets/train.csv",header=None).to_numpy()
test_data = pd.read_csv("./Alphabets/test.csv",header=None).to_numpy()

In [3]:
train_x = train_data[:,:-1].astype("float64")/255
train_y = train_data[:,-1]
test_x = test_data[:,:-1].astype("float64")/255
test_y = test_data[:,-1]

# PART A

In [4]:
class neuralNetwork:
    def __init__(self,batchSize,input_features,architecture,target_class,eta,epoch,activationMode,learningRate):
        self.batchSize = batchSize
        self.input_features = input_features
        self.architecture = architecture
        self.target_class = target_class
        self.learningRate = learningRate
        self.activationMode  = activationMode

        self.parameter = []
        self.layer_input = [0]*(len(architecture)+1)
        self.layer_output = [0]*(len(architecture)+1)
        self.layer_delta = [0]*(len(architecture)+1)
        self.total_layers = len(architecture)+1

        self.eta = eta
        self.epoch = epoch


    def activation(self,x,mode):
        if mode=="sigmoid":
            return 1/(1+np.exp(-x))
        elif mode=="relu":
            return np.maximum(0,x)
        
        
    def differentiation(self,op,mode):
        if mode=="sigmoid":
            return op*(1-op)
        elif mode=="relu":
            return 1 * (op > 0)
        
        
    def oneHotEncoding(self,y): #y is **list** of ouput label 0<=y<26
        """ converts y label to vector representation called One Hot Encoding """
        a = np.array(y)
        b = np.zeros((a.size, self.target_class))
        b[np.arange(a.size),a] = 1
        return b


    def initalize_parameters(self):

        total_layers_architecture = self.architecture  + [self.target_class]
        layerInputSize = self.input_features
        np.random.seed(0)
        """first hidden layer starts with 0 """
        for layer, total_neurons in enumerate(total_layers_architecture):

            layerOutputSize = total_neurons

            layer_weight = np.random.uniform(low=-0.3, high=0.3, size=(layerOutputSize, layerInputSize))
            layer_bias  = np.random.uniform(low=-0.3, high=0.3, size=(layerOutputSize,1))

            self.parameter.append([layer_weight,layer_bias])

            layerInputSize = layerOutputSize

        



    def full_feedForward(self,X):
        current_input = X.copy()

        for layer in range(self.total_layers):

            current_parameter = self.parameter[layer]

            weight = current_parameter[0]
            bias = current_parameter[1]
            netJ = np.dot(current_input,weight.T) + bias.T
            
            if self.activationMode == "relu":
                if layer == self.total_layers-1:
                    G_netJ  = self.activation(netJ,"sigmoid")   # only output layer 
                else :
                    G_netJ  = self.activation(netJ,"relu")   # all hidden layer
            
            elif self.activationMode == "sigmoid":
                G_netJ  = self.activation(netJ,"sigmoid") # all layers 
                    

            self.layer_output[layer] = G_netJ
            self.layer_input[layer] = current_input

            current_input = G_netJ.copy()
        """         last single layer output is the output of entire neural network
                     to be used for calculating new loss function value                   """




    def full_backpropagation(self,Y):

        """  start with last (ouyput)layer whose deltaJ is calculated
             differently then rest of hidden layer                    """
        lastlayer = self.total_layers-1
        op = self.layer_output[lastlayer]
        
        diff_op = self.differentiation(op,"sigmoid")  # in every case output layer is sigmoid
        
        deltaJ_lastlayer = (Y-op)*diff_op/(Y.shape[0])
        self.layer_delta[lastlayer] = deltaJ_lastlayer

        deltaJ_prev = deltaJ_lastlayer.copy()

        #reverse iteration
        for layer in range(self.total_layers-1,0,-1):
            theta_downNBR  = self.parameter[layer][0] # weight without bias

            oj = self.layer_output[layer-1]
            
            if self.activationMode =="relu":
                diff_oj = self.differentiation(oj,"relu")
            elif self.activationMode=="sigmoid":
                diff_oj = self.differentiation(oj,"sigmoid")
                
            deltaJ_curr = np.dot(deltaJ_prev, theta_downNBR)*diff_oj

            self.layer_delta[layer-1] = deltaJ_curr
            deltaJ_prev = deltaJ_curr.copy()

    def costFunction(self,y):
        final_op = self.layer_output[self.total_layers-1]
        return (np.sum((y-final_op)**2))/(2*y.shape[0])



    def updateParameters(self,epochCount):
        ETA = self.eta
        
        if self.learningRate == "normal":
            ETA = self.eta
        elif self.learningRate ==  "adaptive":
            ETA = self.eta/math.sqrt(epochCount)
            
        for i in range(len(self.architecture)+1):
            
            gradient_W = np.dot(self.layer_delta[i].T, self.layer_input[i])
            gradient_B = np.sum(self.layer_delta[i],axis = 0).T.reshape((-1,1))
            self.parameter[i][0] = self.parameter[i][0] + (ETA)*gradient_W
            self.parameter[i][1] = self.parameter[i][1] + (ETA)*gradient_B


    def fit(self,x,y):
        
        self.initalize_parameters()
        cost, newcost = 0,0

        Y = self.oneHotEncoding(y)
        i = 1
        epochNumber = 1
#         while(True):
        for i in range(self.epoch):
            if i%10 ==0:
                epochNumber  = i
            totalBatches = math.ceil(x.shape[0]/self.batchSize)
            for j in range(0,x.shape[0],self.batchSize):
                
                x_batch = x[j:j+self.batchSize]
                y_batch = Y[j:j+self.batchSize]

                self.full_feedForward(x_batch)
               
                self.full_backpropagation(y_batch)

                self.updateParameters(epochNumber)
#                 cost += self.costFunction(y_batch)
#             if i%200==0:
#                 print("currentEpoch",i)
#                 oldcost = newcost
#                 newcost = cost/200
#                 cost = 0
#                 print("differnce is ",abs(oldcost - newcost))
#                 if abs(oldcost - newcost) <= 1e-4:
#                     print("stopping as convergence achieved ",)
#                     print('total epoch is :', i)
#                     break
                
#             i+=1



    def score(self,x,y):
        self.full_feedForward(x)
        final_op = self.layer_output[self.total_layers-1]
        return np.count_nonzero((np.argmax(final_op,axis =1) == y) == True)/y.shape[0],np.argmax(final_op,axis =1)


In [5]:
# batchSize,input_features,architecture,target_class,eta,epoch,learningRate
model = neuralNetwork(100,784,[100],26,0.1,1000,"sigmoid","normal")

In [6]:
s = time.time()
model.fit(train_x,train_y)
print(time.time()-s)

398.6145706176758


In [7]:
model.score(train_x,train_y)

(0.9473846153846154, array([24,  5, 24, ...,  7, 15, 25]))

In [8]:
model.score(test_x,test_y)

(0.8943076923076924, array([25, 13,  6, ...,  3,  4, 12]))