In [1]:
import numpy as np

In [2]:
class ANN:
    #List of List for each layer along with their information
    #Shape: [ [layerType,activationF(),neuronCount],[layerType,activationF(),neuronCount]]
    layer_info = None
    #List of weight matrices of each layer
    #Shape: dxk (where d is no of input)(k is no of output)
    w_l_ij= None
    #List of bias vector of each layer
    #Shape: 1xk (where k is no of output/neuron)
    b_l_j = None
    #List of output of each neuron
    #Shape: 1xd (where d is dimension)
    a_l_ij = None
    #List of d(a_l+1)/da_l (where a_l+1 is output of layer)(a_l is input of layer)
    #Shape: 1xd (same shape as input of layer)
    delta_l_ij = None
    #List of matrices of dl/dw for each layer
    dl_db = None
    dl_dw = None
    #Dictionary for differentiation function for a input function. diff[f()] => df()
    diff = {}
    input_featureCount = None
    input_count = None
    da_da = None
    def __init__(self,xShape,rseed = 45,listLayer= None):
        np.random.seed(rseed)
        inputCount,inputDim = xShape
        self.input_featureCount = inputDim
        self.input_count = inputCount
        self.layer_info = [['None','None',self.input_featureCount]]
        self.w_l_ij = ['None']
        self.b_l_j = ['None']
        self.optimizer = None
        self.da_da = []
        self.diff[ANN.activation_linear] = ANN.diff_activation_linear
        self.diff[ANN.activation_sigmoid] = ANN.diff_activation_sigmoid
        self.diff[ANN.activation_tanh] = ANN.diff_activation_tanh

    def compile(self,optimizer,lossFunction):
        self.optimizer = optimizer
        self.layer_info.append(['Loss',lossFunction])

    def __str__(self):
        return

    def addLayers_Dense(self,neuronCount,activationFunction):
            self.layer_info.append(['Dense',activationFunction,neuronCount])
            w = np.random.normal(0,10,(self.layer_info[-2][2],neuronCount))
            b = np.random.normal(0,10,(1,neuronCount))
            self.w_l_ij.append(w)
            self.b_l_j.append(b)

    def addLayers_softmax(self):
        self.layer_info.append(['Softmax',self.layer_info[-1][2]])
        self.w_l_ij.append('None')
        self.b_l_j.append('None')

    def forward_layer_dense(self,layerNo):
        if layerNo < 1:
            print("ERRRROR")
        #Output = activationFunction(a_l-1 @ w_l + b_l)
        output = self.layer_info[layerNo][1](self.a_l_ij[layerNo-1] @ self.w_l_ij[layerNo] + self.b_l_j[layerNo]) 
        self.a_l_ij.append(output)
    
    def backward_layer_dense(self,layerNo):
        #at each layer we need to calculate da_l/da_l-1, and da_l/dw_l
        #Shape: dxk
        da_1da = self.diff[self.layer_info[layerNo][1]](self.a_l_ij[layerNo]).T @ self.w_l_ij[layerNo].T
        self.da_da.append(da_1da)
        #Shape: dxk
        da_l1_dw_l = np.zeros(self.w_l_ij[layerNo].shape)
        for i in range(da_l1_dw_l.shape[0]):
            for j in range(da_l1_dw_l.shape[1]):
                da_l1_dw_l[i][j] = self.diff[self.layer_info[layerNo][1]](self.a_l_ij[layerNo][0][j],'SINGLE') * self.a_l_ij[layerNo-1][0][i]

        #multiply both by dL/da_l to get dL/da_l-1 and dL/dw_l
        #print(layerNo)
        #print(da_1da)
        #print()
        self.delta_l_ij[layerNo] = self.delta_l_ij[layerNo+1] @ da_1da
        #print("AAA:",da_l1_dw_l,"\n", self.delta_l_ij[layerNo+1])
        temp = np.zeros(self.w_l_ij[layerNo].shape)
        for i in range(temp.shape[0]):
            for j in range(temp.shape[1]):
                temp[i][j] = da_l1_dw_l[i][j] * self.delta_l_ij[layerNo+1][0][j]

        self.dl_dw.append(temp)
        self.dl_db.append(self.delta_l_ij[layerNo+1])
    
    def forward_layer_softmax(self,layerNo):
        if layerNo < 1: 
            print("ERRORRR")
        denom = np.sum( np.exp(self.a_l_ij[layerNo-1]) )
        output = np.exp(self.a_l_ij[layerNo-1]) / denom
        self.a_l_ij.append(output)

    def backward_layer_softmax(self,layerNo):
        pass


    def forward_layer_loss(self,layerNo,y):
        y_hat  =  self.a_l_ij[-1]
        lossFunction = self.layer_info[layerNo][1]
        return lossFunction(y_hat,y)

    def forwardProp(self,x,y):
        self.a_l_ij = []
        self.a_l_ij.append(x)
        for layerNo in range(len(self.layer_info)):
            if self.layer_info[layerNo][0] == 'Dense':
                self.forward_layer_dense(layerNo)
            if self.layer_info[layerNo][0] == 'Softmax':
                self.forward_layer_softmax(layerNo)
            if self.layer_info[layerNo][0] == 'Loss':
                cost = self.forward_layer_loss(layerNo,y)
                return cost
            
    #Handle softmax backpropogation
    def backwardProp(self,y):
        self.dl_dw = []
        self.dl_db = []
        #-1 for getting index, -1 for loss layer
        lastIndex = len(self.layer_info) - 2
        self.delta_l_ij = [i for i in range(0,lastIndex+1)]
        
        #MSE LOSS And Dense
        if(self.layer_info[-1][1] == ANN.loss_MSE):
            self.delta_l_ij[lastIndex] = 2 * ( (self.a_l_ij[lastIndex] - y) @ self.w_l_ij[lastIndex].T) 
            self.dl_dw.append(2*(self.a_l_ij[lastIndex-1].T @ self.a_l_ij[lastIndex] - y))
            self.dl_db.append(2*(self.a_l_ij[lastIndex] - y))
        elif(self.layer_info[-1][1] == ANN.loss_crossEntropy):
            self.dl_dw.append(['None'])
            self.delta_l_ij[lastIndex] = self.a_l_ij[lastIndex] - y 
             
        #BackPropogation
        for layerNo in range(lastIndex-1,0,-1):
            if self.layer_info[layerNo][0] == 'Dense':
                self.backward_layer_dense(layerNo)
                
        self.dl_dw.append(['None'])
        self.dl_db.append(['None'])
        self.dl_dw.reverse()
        self.dl_db.reverse()

    def fit(self,x,y,eta):
        #Hyperparameters if required: 
        epoch = 1000
        tolerance = 1e-5
        self.optimizer(self,x,y,eta,epoch,tolerance)

    @staticmethod    
    def optimizer_gradientDescent(Obj,x,y,eta,epoch,tolerance):
        for i in  range(epoch):
            errorSum = 0
            # print("Epoch:",i,end="\t")
            for j in range(1):
                #x and y both are 2d matrix
                x_1xd = x[j:j+1]
                y_1xk = y[j:j+1]
                errorSum += Obj.forwardProp(x_1xd,y_1xk)
                Obj.backwardProp(y_1xk)
                for index in range(1,len(Obj.w_l_ij)):
                    if( 'None' in Obj.dl_dw[index]):
                        continue
                    Obj.w_l_ij[index] = Obj.w_l_ij[index] - (eta * Obj.dl_dw[index])
                    Obj.b_l_j[index] = Obj.b_l_j[index] - (eta * Obj.dl_db[index] )
            # print("Error:",errorSum)
            if(errorSum < tolerance):
                return

    @staticmethod
    def loss_MSE(P,y):
        return np.sum((P - y)**2)   

    @staticmethod
    def loss_crossEntropy(P,y):
        loss = np.log(P ** y)
        return -np.sum(loss)
      
    @staticmethod
    def activation_linear(z):
        return z
    
    @staticmethod
    def diff_activation_linear(x,flg='None'):
        if(flg != 'SINGLE'):
            return np.identity(x.shape[1])
        else:
            return x
    
    @staticmethod
    def activation_sigmoid(z):    
        return 1 / (1 + np.exp(-z))
    
    @staticmethod    
    def diff_activation_sigmoid(a,flg='None'):
        if(flg !='SINGLE'):
            x = a*(1-a)
            return np.diag(x[0])
        else:
            return a * (1- a)

    @staticmethod
    def activation_tanh(z):
        sinh = np.exp(z) - np.exp(-z)
        cosh = np.exp(z) + np.exp(-z)
        return sinh / cosh
    
    @staticmethod
    def diff_activation_tanh(z,flg='None'):
        if(flg != 'SINGLE'):
            x = 1 - (z*z)
            return np.diag(x[0])
        else:
            return 1 - (z * z)


In [3]:
import pandas as pd
import numpy as np
import sklearn.datasets as sk

In [4]:
dictt = sk.fetch_california_housing()
x = dictt.data
y = np.array(dictt.target)
y = y.T
x

array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
          37.88      , -122.23      ],
       [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
          37.86      , -122.22      ],
       [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
          37.85      , -122.24      ],
       ...,
       [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
          39.43      , -121.22      ],
       [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
          39.43      , -121.32      ],
       [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
          39.37      , -121.24      ]])

In [5]:
from sklearn.preprocessing import Normalizer
transformer = Normalizer().fit(x)
x = transformer.transform(x)

### Q2 (a)

In [6]:
model = ANN(x.shape,42)
model.addLayers_Dense(1,ANN.activation_linear)
model.compile(ANN.optimizer_gradientDescent,ANN.loss_MSE)
model.fit(x,y,1e-2)

In [7]:
model.a_l_ij

[array([[ 0.0238481 ,  0.1174473 ,  0.02000651,  0.00293277,  0.92239103,
          0.00732056,  0.10850985, -0.3501362 ]]),
 array([[4.19180001]])]

### Q2 (b)

In [8]:
model = ANN(x.shape,42)
model.addLayers_Dense(13,ANN.activation_sigmoid)
model.addLayers_Dense(1,ANN.activation_linear)
model.compile(ANN.optimizer_gradientDescent,ANN.loss_MSE)
model.fit(x,y,1e-2)

In [9]:
model.a_l_ij

[array([[ 0.0238481 ,  0.1174473 ,  0.02000651,  0.00293277,  0.92239103,
          0.00732056,  0.10850985, -0.3501362 ]]),
 array([[4.09401803e-08, 9.99991341e-01, 1.00000000e+00, 9.99998831e-01,
         5.73691430e-01, 6.47717916e-02, 6.05984694e-11, 9.99990562e-01,
         6.39842892e-02, 1.00000000e+00, 5.03230576e-07, 1.30622857e-03,
         9.99639474e-01]]),
 array([[4.52697958]])]

### Q2 (c)

In [10]:
model = ANN(x.shape,42)
model.addLayers_Dense(13,ANN.activation_sigmoid)
model.addLayers_Dense(13,ANN.activation_sigmoid)
model.addLayers_Dense(1,ANN.activation_linear)
model.compile(ANN.optimizer_gradientDescent,ANN.loss_MSE)
model.fit(x,y,1e-1)

In [11]:
model.a_l_ij

[array([[ 0.0238481 ,  0.1174473 ,  0.02000651,  0.00293277,  0.92239103,
          0.00732056,  0.10850985, -0.3501362 ]]),
 array([[9.99933009e-01, 9.48199636e-14, 1.00000000e+00, 1.49949124e-51,
         3.32109711e-13, 1.00000000e+00, 3.24557519e-08, 9.99994845e-01,
         9.99999993e-01, 1.00000000e+00, 2.37250354e-32, 1.00000000e+00,
         1.39303035e-63]]),
 array([[7.36336000e-43, 7.10753885e-05, 8.14451596e-24, 6.38091956e-37,
         5.79508715e-38, 9.19352551e-44, 1.17542481e-28, 5.08279735e-49,
         3.69030153e-31, 1.05842797e-31, 2.28821558e-31, 5.48591637e-57,
         5.44663293e-33]]),
 array([[4.52911511]])]

### Q3

In [12]:
dictt = sk.load_digits()
x = dictt.data
y = dictt.target
x


array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ..., 10.,  0.,  0.],
       [ 0.,  0.,  0., ..., 16.,  9.,  0.],
       ...,
       [ 0.,  0.,  1., ...,  6.,  0.,  0.],
       [ 0.,  0.,  2., ..., 12.,  0.,  0.],
       [ 0.,  0., 10., ..., 12.,  1.,  0.]])

In [13]:
from sklearn.preprocessing import Normalizer
transformer = Normalizer().fit(x)
x = transformer.transform(x)

In [14]:
def oneHot_encoding(categories ,labels):
    num_samples = len(labels)
    num_categories = len(categories)
    one_hot = np.zeros((num_samples, num_categories))

    # Perform one-hot encoding
    for i, label in enumerate(labels):
        index = categories.index(label)
        one_hot[i, index] = 1
    return one_hot

In [15]:
y_encoded = oneHot_encoding([0,1,2,3,4,5,6,7,8,9],y)
y_encoded.shape

(1797, 10)

### Q3 (a)

In [16]:
model = ANN(x.shape,42)
model.addLayers_Dense(89, ANN.activation_tanh)
model.addLayers_Dense(10, ANN.activation_sigmoid)
model.compile(ANN.optimizer_gradientDescent, ANN.loss_MSE)
model.fit(x, y_encoded, 1e-5)

In [17]:
model.delta_l_ij

[array([[0.        , 0.        , 0.09024036, 0.23462493, 0.16243265,
         0.01804807, 0.        , 0.        , 0.        , 0.        ,
         0.23462493, 0.27072108, 0.18048072, 0.27072108, 0.09024036,
         0.        , 0.        , 0.05414422, 0.27072108, 0.03609614,
         0.        , 0.19852879, 0.14438458, 0.        , 0.        ,
         0.07219229, 0.21657686, 0.        , 0.        , 0.14438458,
         0.14438458, 0.        , 0.        , 0.09024036, 0.14438458,
         0.        , 0.        , 0.16243265, 0.14438458, 0.        ,
         0.        , 0.07219229, 0.19852879, 0.        , 0.01804807,
         0.21657686, 0.1263365 , 0.        , 0.        , 0.03609614,
         0.25267301, 0.09024036, 0.18048072, 0.21657686, 0.        ,
         0.        , 0.        , 0.        , 0.10828843, 0.23462493,
         0.18048072, 0.        , 0.        , 0.        ]]),
 array([[ 0.99988667, -1.        , -0.98022198, -0.99254163,  0.9905437 ,
          0.95507904, -1.        , -1.

In [18]:
model.delta_l_ij

[0,
 array([[ 3.24585506e+02, -1.25915519e+03, -9.27198089e+02,
          2.78172628e+02,  2.95292868e+02,  3.19558649e+02,
         -5.10554928e+02,  6.18847606e+02, -5.47482647e+02,
         -1.30137193e+03, -2.45390644e+02,  8.03143859e+01,
         -6.41288412e+02, -5.16693802e+02, -7.24987319e+02,
         -3.78239265e+02, -1.04368786e+03,  1.32802274e+02,
         -2.81533937e+01, -4.32157379e+02,  1.69300272e+03,
          5.98586577e+02,  6.69003997e+02, -1.07742385e+03,
          7.07579592e+02, -8.77724172e+02,  1.07282964e+02,
          1.51632140e+02,  3.95349027e+02, -2.08609761e+02,
         -6.17028779e+02,  5.60655329e+02,  5.03497748e+02,
          8.51292350e+02,  1.34021732e+03,  7.93485546e+02,
         -4.89079953e+02,  6.44095574e+02, -3.67774430e+02,
         -7.36707457e+02, -5.22669927e+02,  4.14709084e+02,
          1.13076419e+03,  3.28044629e+02, -2.74891681e+02,
         -9.22973356e+02, -1.08240653e+02,  1.85978136e+03,
          4.04017992e+02,  5.3642746

### Q3 (b)

In [19]:
model = ANN(x.shape,42)
model.addLayers_Dense(89,ANN.activation_tanh)
model.addLayers_Dense(10,ANN.activation_linear)
model.addLayers_softmax()
model.compile(ANN.optimizer_gradientDescent,ANN.loss_crossEntropy)
model.fit(x,y_encoded,1e-3)

In [20]:
model.layer_info

[['None', 'None', 64],
 ['Dense', <function __main__.ANN.activation_tanh(z)>, 89],
 ['Dense', <function __main__.ANN.activation_linear(z)>, 10],
 ['Softmax', 10],
 ['Loss', <function __main__.ANN.loss_crossEntropy(P, y)>]]

In [23]:
model.delta_l_ij

[0,
 array([[-2.21606798e-21, -2.67981488e-21,  3.53081971e-23,
         -4.10005786e-21, -1.54493733e-21,  3.06351958e-21,
         -3.01740705e-21, -5.53007366e-21, -4.02057844e-22,
          2.10950172e-21, -3.81263736e-22,  3.01185702e-22,
         -1.34481179e-21, -6.87468616e-21, -7.37657328e-23,
         -6.06722538e-21,  5.05265764e-21,  2.69596834e-21,
         -8.36417868e-21, -2.20745045e-21, -8.75116437e-21,
          3.21212982e-21,  2.35789897e-21, -1.05175408e-21,
         -2.19502474e-21,  3.68873994e-21,  4.79106415e-21,
          3.04176731e-21, -2.64751057e-21,  1.98588536e-21,
         -4.39546805e-21,  1.75588919e-21, -1.60507537e-20,
          9.31840830e-21, -9.86725476e-21, -6.32877223e-22,
         -3.49970596e-22, -4.10124358e-21, -5.30407210e-21,
         -1.16216130e-20,  2.40897005e-22,  7.16325974e-22,
          3.76883553e-21, -2.96202671e-22,  2.31425547e-21,
         -2.66080333e-22, -5.28782773e-21,  5.44544791e-21,
          1.48931527e-21, -1.0409474