In [50]:
import numpy as np

In [51]:
class ANN:
    #List of List for each layer along with their information
    #Shape: [ [layerType,activationF(),neuronCount],[layerType,activationF(),neuronCount]]
    layer_info = None
    #List of weight matrices of each layer
    #Shape: dxk (where d is no of input)(k is no of output)
    w_l_ij= None
    #List of bias vector of each layer
    #Shape: 1xk (where k is no of output/neuron)
    b_l_j = None
    #List of output of each neuron
    #Shape: 1xd (where d is dimension)
    a_l_ij = None
    #List of d(a_l+1)/da_l (where a_l+1 is output of layer)(a_l is input of layer)
    #Shape: 1xd (same shape as input of layer)
    delta_l_ij = None
    #List of matrices of dl/dw for each layer
    dl_db = None
    dl_dw = None
    #Dictionary for differentiation function for a input function. diff[f()] => df()
    diff = {}
    input_featureCount = None
    input_count = None
    def __init__(self,xShape,rseed = 10,listLayer= None):
        np.random.seed(rseed)
        inputCount,inputDim = xShape
        self.input_featureCount = inputDim
        self.input_count = inputCount
        self.layer_info = [['None','None',self.input_featureCount]]
        self.w_l_ij = ['None']
        self.b_l_j = ['None']
        self.optimizer = None
        self.diff[ANN.activation_linear] = ANN.diff_activation_linear
        self.diff[ANN.activation_sigmoid] = ANN.diff_activation_sigmoid
        self.diff[ANN.activation_tanh] = ANN.diff_activation_tanh

    def compile(self,optimizer,lossFunction):
        self.optimizer = optimizer
        self.layer_info.append(['Loss',lossFunction])

    def __str__(self):
        return

    def addLayers_Dense(self,neuronCount,activationFunction):
            self.layer_info.append(['Dense',activationFunction,neuronCount])
            w = np.random.normal(0,1,size =(self.layer_info[-2][2],neuronCount))
            b = np.random.normal(0,1,size=(neuronCount))
            self.w_l_ij.append(w)
            self.b_l_j.append(b)

    def addLayers_softmax(self):
        self.layer_info.append(['Softmax',self.layer_info[-1][2]])
        self.w_l_ij.append('None')
        self.b_l_j.append('None')

    def forward_layer_dense(self,layerNo):
        if layerNo < 1:
            print("ERRRROR")
        #Output = activationFunction(a_l-1 @ w_l + b_l)
        output = self.layer_info[layerNo][1](self.a_l_ij[layerNo-1] @ self.w_l_ij[layerNo] + self.b_l_j[layerNo]) 
        self.a_l_ij.append(output)
    
    def backward_layer_dense(self,layerNo):
        #at each layer we need to calculate da_l/da_l-1, and da_l/dw_l
        #Shape: dxk
        da_1da = self.diff[self.layer_info[layerNo][1]](self.a_l_ij[layerNo]).T @ self.w_l_ij[layerNo].T
        #Shape: dxk
        da_l1_dw_l = np.zeros(self.w_l_ij[layerNo].shape)
        for i in range(da_l1_dw_l.shape[0]):
            for j in range(da_l1_dw_l.shape[1]):
                da_l1_dw_l[i][j] = self.diff[self.layer_info[layerNo][1]](self.a_l_ij[layerNo][0][j],'SINGLE') * self.a_l_ij[layerNo-1][0][i]

        #multiply both by dL/da_l to get dL/da_l-1 and dL/dw_l
        #print(layerNo)
        # print(da_1da)
        # print()
        self.delta_l_ij[layerNo] = self.delta_l_ij[layerNo+1] @ da_1da

        #print("AAA:",da_l1_dw_l,"\n", self.delta_l_ij[layerNo+1])
        temp = np.zeros(self.w_l_ij[layerNo].shape)
        for i in range(temp.shape[0]):
            for j in range(temp.shape[1]):
                temp[i][j] = da_l1_dw_l[i][j] * self.delta_l_ij[layerNo+1][0][j]

        self.dl_dw.append(temp)
        self.dl_db.append(self.delta_l_ij[layerNo+1])
    
    def forward_layer_softmax(self,layerNo):
        if layerNo < 1: 
            print("ERRORRR")
        denom = np.sum( np.exp(self.a_l_ij[layerNo-1]) )
        output = np.exp(self.a_l_ij[layerNo-1]) / denom
        self.a_l_ij.append(output)

    def backward_layer_softmax(self,layerNo):
        pass


    def forward_layer_loss(self,layerNo,y):
        y_hat  =  self.a_l_ij[-1]
        lossFunction = self.layer_info[layerNo][1]
        return lossFunction(y_hat,y)

    def forwardProp(self,x,y):
        self.a_l_ij = []
        self.a_l_ij.append(x)
        for layerNo in range(len(self.layer_info)):
            if self.layer_info[layerNo][0] == 'Dense':
                self.forward_layer_dense(layerNo)
            if self.layer_info[layerNo][0] == 'Softmax':
                self.forward_layer_softmax(layerNo)
            if self.layer_info[layerNo][0] == 'Loss':
                cost = self.forward_layer_loss(layerNo,y)
                return cost
            
    #Handle softmax backpropogation
    def backwardProp(self,y):
        self.dl_dw = []
        self.dl_db = []
        #-1 for getting index, -1 for loss layer
        lastIndex = len(self.layer_info) - 2
        self.delta_l_ij = [i for i in range(0,lastIndex+1)]
        
        #MSE LOSS And Dense
        if(self.layer_info[-1][1] == ANN.loss_MSE):
            self.delta_l_ij[lastIndex] = 2 * ( (self.a_l_ij[lastIndex] - y) @ self.w_l_ij[lastIndex].T) 
            self.dl_dw.append(2*(self.a_l_ij[lastIndex-1].T @ self.a_l_ij[lastIndex] - y))
            self.dl_db.append(2*(self.a_l_ij[lastIndex] - y))
        elif(self.layer_info[-1][1] == ANN.loss_crossEntropy):
            self.dl_dw.append(['None'])
            self.delta_l_ij[lastIndex] = self.a_l_ij[lastIndex] - y 
             
        #BackPropogation
        for layerNo in range(lastIndex-1,0,-1):
            if self.layer_info[layerNo][0] == 'Dense':
                self.backward_layer_dense(layerNo)
                
        self.dl_dw.append(['None'])
        self.dl_db.append(['None'])
        self.dl_dw.reverse()
        self.dl_db.reverse()


            
    def fit(self,x,y,eta,epoch = 1000):
        #Hyperparameters if required: 
        tolerance = 1e-5
        self.optimizer(self,x,y,eta,epoch,tolerance)

    @staticmethod    
    def optimizer_gradientDescent(Obj,x,y,eta,epoch,tolerance):
        for i in  range(epoch):
            errorSum = 0
            print("Epoch:",i,end="\t")
            for j in range(1):
                #x and y both are 2d matrix
                x_1xd = x[j:j+1]
                y_1xk = y[j:j+1]
                errorSum += Obj.forwardProp(x_1xd,y_1xk)
                Obj.backwardProp(y_1xk)
                for index in range(1,len(Obj.w_l_ij)):
                    if( 'None' in Obj.dl_dw[index]):
                        continue
                    Obj.w_l_ij[index] = Obj.w_l_ij[index] - (eta * Obj.dl_dw[index])
                    Obj.b_l_j[index] = Obj.b_l_j[index] - (eta * Obj.dl_db[index] )
            print("Error:",errorSum)
            if(errorSum < tolerance):
                return

    @staticmethod
    def loss_MSE(P,y):
        return np.sum((P - y)**2)   

    @staticmethod
    def loss_crossEntropy(P,y):
        loss = np.log(P ** y)
        return -np.sum(loss)
      
    @staticmethod
    def activation_linear(z):
        return z
    
    @staticmethod
    def diff_activation_linear(x,flg='None'):
        if(flg != 'SINGLE'):
            return np.identity(x.shape[1])
        else:
            return x
    
    @staticmethod
    def activation_sigmoid(z):    
        return 1 / (1 + np.exp(-z))
    
    @staticmethod    
    def diff_activation_sigmoid(a,flg='None'):
        if(flg !='SINGLE'):
            x = a*(1-a)
            return np.diag(x[0])
        else:
            return a * (1- a)

    @staticmethod
    def activation_tanh(z):
        sinh = np.exp(z) - np.exp(-z)
        cosh = np.exp(z) + np.exp(-z)
        return sinh / cosh
    
    @staticmethod
    def diff_activation_tanh(z,flg='None'):
        if(flg != 'SINGLE'):
            x = 1 - (z*z)
            return np.diag(x[0])
        else:
            return 1 - (z * z)


In [52]:
import pandas as pd
import numpy as np
import sklearn.datasets as sk

In [53]:
dictt = sk.fetch_california_housing()
x = dictt.data
y = np.array(dictt.target)
y = y.T
x

array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
          37.88      , -122.23      ],
       [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
          37.86      , -122.22      ],
       [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
          37.85      , -122.24      ],
       ...,
       [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
          39.43      , -121.22      ],
       [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
          39.43      , -121.32      ],
       [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
          39.37      , -121.24      ]])

In [54]:
from sklearn.preprocessing import Normalizer
transformer = Normalizer().fit(x)
x = transformer.transform(x)

### Q2 (a)

In [55]:
model = ANN(x.shape,42)
model.addLayers_Dense(1,ANN.activation_linear)
model.compile(ANN.optimizer_gradientDescent,ANN.loss_MSE)
model.fit(x,y,1e-2)

Epoch: 0	Error: 28.063339827643507
Epoch: 1	Error: 25.999320881483996
Epoch: 2	Error: 24.09202540353288
Epoch: 3	Error: 22.329370098569445
Epoch: 4	Error: 20.70021084873079
Epoch: 5	Error: 19.194269408126527
Epoch: 6	Error: 17.802065831550784
Epoch: 7	Error: 16.514856188260325
Epoch: 8	Error: 15.324575146970316
Epoch: 9	Error: 14.223783050645254
Epoch: 10	Error: 13.20561712954678
Epoch: 11	Error: 12.263746528542141
Epoch: 12	Error: 11.392330850060791
Epoch: 13	Error: 10.585981937480726
Epoch: 14	Error: 9.83972864528695
Epoch: 15	Error: 9.148984362215574
Epoch: 16	Error: 8.509517071910828
Epoch: 17	Error: 7.9174217525008554
Epoch: 18	Error: 7.369094932054112
Epoch: 19	Error: 6.861211231215018
Epoch: 20	Error: 6.390701737530949
Epoch: 21	Error: 5.954734068160585
Epoch: 22	Error: 5.5506939888773745
Epoch: 23	Error: 5.176168467626142
Epoch: 24	Error: 4.828930050424535
Epoch: 25	Error: 4.506922456187723
Epoch: 26	Error: 4.208247295153016
Epoch: 27	Error: 3.931151823044813
Epoch: 28	Error: 3

### Q2 (b)

In [56]:
model = ANN(x.shape,42)
model.addLayers_Dense(13,ANN.activation_sigmoid)
model.addLayers_Dense(1,ANN.activation_linear)
model.compile(ANN.optimizer_gradientDescent,ANN.loss_MSE)
model.fit(x,y,1e-2)

Epoch: 0	Error: 19.1704730793367
Epoch: 1	Error: 11.238047548406872
Epoch: 2	Error: 6.2777618366948555
Epoch: 3	Error: 3.2894019658702875
Epoch: 4	Error: 1.5709335726665759
Epoch: 5	Error: 0.6446522759101476
Epoch: 6	Error: 0.19612946234038167
Epoch: 7	Error: 0.024502694781255004
Epoch: 8	Error: 0.00492067343693741
Epoch: 9	Error: 0.061895914667486354
Epoch: 10	Error: 0.1512212192700418
Epoch: 11	Error: 0.248078677922689
Epoch: 12	Error: 0.33940277293241045
Epoch: 13	Error: 0.41906943279167547
Epoch: 14	Error: 0.48491868031457697
Epoch: 15	Error: 0.5369457890912772
Epoch: 16	Error: 0.5762258212833854
Epoch: 17	Error: 0.6042917931614373
Epoch: 18	Error: 0.6227890847573675
Epoch: 19	Error: 0.633294989312751
Epoch: 20	Error: 0.6372346499240128
Epoch: 21	Error: 0.6358514014367589
Epoch: 22	Error: 0.6302062911825059
Epoch: 23	Error: 0.6211919348373245
Epoch: 24	Error: 0.6095522254147265
Epoch: 25	Error: 0.5959032578434055
Epoch: 26	Error: 0.5807531125674154
Epoch: 27	Error: 0.56451946028289

### Q2 (c)

In [57]:
model = ANN(x.shape,42)
model.addLayers_Dense(13,ANN.activation_sigmoid)
model.addLayers_Dense(13,ANN.activation_sigmoid)
model.addLayers_Dense(1,ANN.activation_linear)
model.compile(ANN.optimizer_gradientDescent,ANN.loss_MSE)
model.fit(x,y,1e-1)

Epoch: 0	Error: 14.657749435459955
Epoch: 1	Error: 34.885464423919785
Epoch: 2	Error: 42.86621722386456
Epoch: 3	Error: 9.584090368934318
Epoch: 4	Error: 4.868305880668432
Epoch: 5	Error: 4.579586089736496
Epoch: 6	Error: 2.816562201437622
Epoch: 7	Error: 5.326346299041565
Epoch: 8	Error: 4.3403176288650025
Epoch: 9	Error: 6.051498279784044
Epoch: 10	Error: 5.594512741588135
Epoch: 11	Error: 8.493104132478917
Epoch: 12	Error: 9.296130478857027
Epoch: 13	Error: 4.449569596830767
Epoch: 14	Error: 7.189243578396722
Epoch: 15	Error: 31.882971312593426
Epoch: 16	Error: 12.80385946706105
Epoch: 17	Error: 3.2943431131506924
Epoch: 18	Error: 0.6725131171566218
Epoch: 19	Error: 0.7841884023908348
Epoch: 20	Error: 0.7512446349166233
Epoch: 21	Error: 0.5281507062688716
Epoch: 22	Error: 0.5570278748067041
Epoch: 23	Error: 0.8675149811876233
Epoch: 24	Error: 0.7457601700133119
Epoch: 25	Error: 0.3829767507809251
Epoch: 26	Error: 0.45021803424181694
Epoch: 27	Error: 1.1212655940839038
Epoch: 28	Erro

  return 1 / (1 + np.exp(-z))


 150.3512793847866
Epoch: 295	Error: 96.22481880626341
Epoch: 296	Error: 61.583884036008584
Epoch: 297	Error: 39.413685783045494
Epoch: 298	Error: 25.224758901149112
Epoch: 299	Error: 16.14384569673543
Epoch: 300	Error: 53402.54392356676
Epoch: 301	Error: 2443.5555722237186
Epoch: 302	Error: 1563.8755662231797
Epoch: 303	Error: 1000.8803623828348
Epoch: 304	Error: 640.5634319250144
Epoch: 305	Error: 409.96059643200914
Epoch: 306	Error: 262.37478171648587
Epoch: 307	Error: 167.91986029855096
Epoch: 308	Error: 107.46871059107261
Epoch: 309	Error: 68.77997477828647
Epoch: 310	Error: 44.01918385810334
Epoch: 311	Error: 28.172277669186133
Epoch: 312	Error: 18.030257708279123
Epoch: 313	Error: 11.53936493329864
Epoch: 314	Error: 7.38519355731113
Epoch: 315	Error: 4.726523859429558
Epoch: 316	Error: 58716.524981313734
Epoch: 317	Error: 2520.264045148729
Epoch: 318	Error: 1612.9689888951862
Epoch: 319	Error: 1032.300152892919
Epoch: 320	Error: 660.6720978514684
Epoch: 321	Error: 422.8301426249

### Q3

In [58]:
dictt = sk.load_digits()
x = dictt.data
y = dictt.target
x

array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ..., 10.,  0.,  0.],
       [ 0.,  0.,  0., ..., 16.,  9.,  0.],
       ...,
       [ 0.,  0.,  1., ...,  6.,  0.,  0.],
       [ 0.,  0.,  2., ..., 12.,  0.,  0.],
       [ 0.,  0., 10., ..., 12.,  1.,  0.]])

In [59]:
def oneHot_encoding(categories ,labels):
    num_samples = len(labels)
    num_categories = len(categories)
    one_hot = np.zeros((num_samples, num_categories))

    # Perform one-hot encoding
    for i, label in enumerate(labels):
        index = categories.index(label)
        one_hot[i, index] = 1
    return one_hot

In [60]:
y_encoded = oneHot_encoding([0,1,2,3,4,5,6,7,8,9],y)
y_encoded.shape

(1797, 10)

### Q3 (a)

In [61]:
model = ANN(x.shape,42)
model.addLayers_Dense(89, ANN.activation_tanh)
model.addLayers_Dense(10, ANN.activation_sigmoid)
model.compile(ANN.optimizer_gradientDescent, ANN.loss_MSE)
model.fit(x, y_encoded, 1e-3)

Epoch: 0	Error: 3.3003418872043464
Epoch: 1	Error: 3.585484809898462
Epoch: 2	Error: 3.4976923484569724
Epoch: 3	Error: 3.402876417020222
Epoch: 4	Error: 3.2981691555730803
Epoch: 5	Error: 3.1660248775951625
Epoch: 6	Error: 2.8980193793341975
Epoch: 7	Error: 3.0127156364746
Epoch: 8	Error: 2.9029210128355385
Epoch: 9	Error: 2.7917478529172786
Epoch: 10	Error: 2.680271676146691
Epoch: 11	Error: 2.5695494093408695
Epoch: 12	Error: 2.4605818065086447
Epoch: 13	Error: 2.3542824848832455
Epoch: 14	Error: 2.251453820158847
Epoch: 15	Error: 2.152770016905346
Epoch: 16	Error: 2.0587683175266767
Epoch: 17	Error: 1.9698500062870843
Epoch: 18	Error: 1.8862928773961942
Epoch: 19	Error: 1.8082751282367493
Epoch: 20	Error: 1.73590572916896
Epoch: 21	Error: 1.669247441077273
Epoch: 22	Error: 1.6083103961761227
Epoch: 23	Error: 1.5530019408665374
Epoch: 24	Error: 1.5030564352547007
Epoch: 25	Error: 1.458009595513542
Epoch: 26	Error: 1.4172595949278128
Epoch: 27	Error: 1.380176105370837
Epoch: 28	Error

### Q3 (b)

In [62]:
model = ANN(x.shape,42)
model.addLayers_Dense(89,ANN.activation_tanh)
model.addLayers_Dense(10,ANN.activation_linear)
model.addLayers_softmax()
model.compile(ANN.optimizer_gradientDescent,ANN.loss_crossEntropy)
model.fit(x,y_encoded,1e-3,100)

Epoch: 0	Error: 2.51227500075252
Epoch: 1	Error: 1.2988366464338814
Epoch: 2	Error: 0.6238509561011663
Epoch: 3	Error: 0.3549839945162265
Epoch: 4	

Error: 0.2395282228743936
Epoch: 5	Error: 0.17900111775088934
Epoch: 6	Error: 0.14237566698023424
Epoch: 7	Error: 0.11799379342550347
Epoch: 8	Error: 0.10065147398823193
Epoch: 9	Error: 0.08770753325639251
Epoch: 10	Error: 0.07768767284723527
Epoch: 11	Error: 0.06970708741491931
Epoch: 12	Error: 0.06320374547946095
Epoch: 13	Error: 0.057804002195201674
Epoch: 14	Error: 0.05325000589570861
Epoch: 15	Error: 0.04935818761159869
Epoch: 16	Error: 0.045994373427884226
Epoch: 17	Error: 0.04305825428949398
Epoch: 18	Error: 0.040473357719128404
Epoch: 19	Error: 0.03818037778956244
Epoch: 20	Error: 0.03613262257859758
Epoch: 21	Error: 0.03429283531877174
Epoch: 22	Error: 0.03263092942098569
Epoch: 23	Error: 0.03112234523513283
Epoch: 24	Error: 0.029746838367680026
Epoch: 25	Error: 0.028487573011773255
Epoch: 26	Error: 0.027330434408853885
Epoch: 27	Error: 0.026263501103776
Epoch: 28	Error: 0.025276635319806343
Epoch: 29	Error: 0.024361161745011385
Epoch: 30	Error: 0.02350961325841923
Epoch: 31	E