In [84]:
import numpy as np

In [85]:
class ANN:
    #List of List for each layer along with their information
    #Shape: [ [layerType,activationF(),neuronCount],[layerType,activationF(),neuronCount]]
    layer_info = None
    #List of weight matrices of each layer
    #Shape: dxk (where d is no of input)(k is no of output)
    w_l_ij= None
    #List of bias vector of each layer
    #Shape: 1xk (where k is no of output/neuron)
    b_l_j = None
    #List of output of each neuron
    #Shape: 1xd (where d is dimension)
    a_l_ij = None
    #List of d(a_l+1)/da_l (where a_l+1 is output of layer)(a_l is input of layer)
    #Shape: 1xd (same shape as input of layer)
    delta_l_ij = None
    #List of matrices of dl/dw for each layer
    dl_db = None
    dl_dw = None
    #Dictionary for differentiation function for a input function. diff[f()] => df()
    diff = {}
    input_featureCount = None
    input_count = None
    def __init__(self,xShape,rseed = 10,listLayer= None):
        np.random.seed(rseed)
        inputCount,inputDim = xShape
        self.input_featureCount = inputDim
        self.input_count = inputCount
        self.layer_info = [['None','None',self.input_featureCount]]
        self.w_l_ij = ['None']
        self.b_l_j = ['None']
        self.optimizer = None
        self.diff[ANN.activation_linear] = ANN.diff_activation_linear
        self.diff[ANN.activation_sigmoid] = ANN.diff_activation_sigmoid
        self.diff[ANN.activation_tanh] = ANN.diff_activation_tanh

    def compile(self,optimizer,lossFunction):
        self.optimizer = optimizer
        self.layer_info.append(['Loss',lossFunction])

    def __str__(self):
        return

    def addLayers_Dense(self,neuronCount,activationFunction):
            self.layer_info.append(['Dense',activationFunction,neuronCount])
            w = np.random.rand(self.layer_info[-2][2],neuronCount)
            b = np.random.rand(1,neuronCount)
            self.w_l_ij.append(w)
            self.b_l_j.append(b)

    def addLayers_softmax(self):
        self.layer_info.append(['Softmax',self.layer_info[-1][2]])

    def forward_layer_dense(self,layerNo):
        if layerNo < 1:
            print("ERRRROR")
        
        #Output = activationFunction(a_l-1 @ w_l + b_l)
        output = self.layer_info[layerNo][1](self.a_l_ij[layerNo-1] @ self.w_l_ij[layerNo] + self.b_l_j[layerNo]) 
        self.a_l_ij.append(output)
    
    def backward_layer_dense(self,layerNo):
        #at each layer we need to calculate da_l/da_l-1, and da_l/dw_l

        #Shape: dxk
        da_1da = self.w_l_ij[layerNo]
        #Shape: dx1
        da_l_1dw_l = self.a_l_ij[layerNo-1].T

        #multiply both by dL/da_l to get dL/da_l-1 and dL/dw_l
        self.delta_l_ij[layerNo] = self.delta_l_ij[layerNo+1] @ da_1da.T 
        self.dl_dw.append(da_l_1dw_l @ self.delta_l_ij[layerNo+1])
        self.dl_db.append(self.delta_l_ij[layerNo+1])
    
    def forward_layer_softmax(self,layerNo):
        if layerNo < 1: 
            print("ERRORRR")
        denom = np.sum( np.exp(self.a_l_ij[layerNo-1]) )
        output = np.exp(self.a_l_ij[layerNo]) / denom
        self.a_l_ij.append(output)

    def backward_layer_softmax(self,layerNo):
        pass

    #Create cross entropy loss 
    def forward_layer_loss(self,layerNo,y):
        y_hat  =  self.a_l_ij[-1]
        lossFunction = self.layer_info[layerNo][1]
        return lossFunction(y_hat,y)

    def forwardProp(self,x,y):
        self.a_l_ij = []
        self.a_l_ij.append(x)
        for layerNo in range(len(self.layer_info)):
            if self.layer_info[layerNo][0] == 'Dense':
                self.forward_layer_dense(layerNo)
            if self.layer_info[layerNo][0] == 'Softmax':
                self.forward_layer_softmax(layerNo)
            if self.layer_info[layerNo][0] == 'Loss':
                cost = self.forward_layer_loss(layerNo,y)
                return cost
            
    #Handle softmax backpropogation
    def backwardProp(self,y):
        self.dl_dw = []
        self.dl_db = []
        lastIndex = len(self.layer_info) - 2
        self.delta_l_ij = [i for i in range(0,lastIndex+1)]
        
        #MSE LOSS And Dense
        self.delta_l_ij[lastIndex] = 2 * ( (self.a_l_ij[lastIndex] - y) @ self.w_l_ij[lastIndex].T) 
        self.dl_dw.append(self.a_l_ij[lastIndex-1].T @ (self.a_l_ij[lastIndex] - y))
        self.dl_db.append(2*(self.a_l_ij[lastIndex] - y))
        #BackPropogation
        for layerNo in range(lastIndex-1,0,-1):
            if self.layer_info[layerNo][0] == 'Dense':
                self.backward_layer_dense(layerNo)
            if self.layer_info[layerNo][0] == 'Softmax':
                self.backward_layer_softmax(layerNo)
                
        self.dl_dw.append(['None'])
        self.dl_db.append(['None'])
        self.dl_dw.reverse()
        self.dl_db.reverse()


            
    def fit(self,x,y):
        #Hyperparameters if required: 
        epoch = 10
        eta = 1e-6
        self.optimizer(self,x,y,eta,epoch)

    @staticmethod    
    def optimizer_gradientDescent(Obj,x,y,eta,epoch):
        for i in  range(epoch):
            errorSum = 0
            print("Epoch:",i,end="\t[")
            for j in range(y.shape[0]):
                #x and y both are 2d matrix
                x_1xd = x[j:j+1]
                y_1xk = y[j:j+1]
                errorSum += Obj.forwardProp(x_1xd,y_1xk)
                Obj.backwardProp(y_1xk)
                for index in range(1,len(Obj.w_l_ij)):
                    Obj.w_l_ij[index] = Obj.w_l_ij[index] - (eta * Obj.dl_dw[index])
                    Obj.b_l_j[index] = Obj.b_l_j[index] - (eta * Obj.dl_db[index] )
            print("]", end='\t')
            print("Error:",errorSum)

    @staticmethod
    def loss_MSE(P,y):
        return np.sum((P - y)**2)   
      
    @staticmethod
    def activation_linear(z):
        return z
    
    @staticmethod
    def diff_activation_linear(x):
        return np.ones(x.shape)
    
    @staticmethod
    def activation_sigmoid(z):    
        return 1 / (1 + np.exp(-z))
    
    @staticmethod    
    def diff_activation_sigmoid(a):
        return a * (1- a)

    @staticmethod
    def activation_tanh(z):
        sinh = np.exp(z) - np.exp(-z)
        cosh = np.exp(z) + np.exp(-z)
        return sinh / cosh
    
    @staticmethod
    def diff_activation_tanh(z):
        return 1 - (z * z)

    @staticmethod
    def activation_relu(z):
        pass

In [86]:
import pandas as pd
import numpy as np
import sklearn.datasets as sk

In [87]:
dictt = sk.fetch_california_housing()
x = dictt.data
y = np.array(dictt.target)
y = y.T
x

array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
          37.88      , -122.23      ],
       [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
          37.86      , -122.22      ],
       [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
          37.85      , -122.24      ],
       ...,
       [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
          39.43      , -121.22      ],
       [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
          39.43      , -121.32      ],
       [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
          39.37      , -121.24      ]])

In [88]:
from sklearn.preprocessing import Normalizer
transformer = Normalizer().fit(x)
x = transformer.transform(x)
x

array([[ 0.0238481 ,  0.1174473 ,  0.02000651, ...,  0.00732056,
         0.10850985, -0.3501362 ],
       [ 0.00345241,  0.00873354,  0.00259434, ...,  0.00087745,
         0.01574533, -0.05082923],
       [ 0.01409202,  0.10097076,  0.01609345, ...,  0.00544128,
         0.07349506, -0.23735895],
       ...,
       [ 0.00167455,  0.01674553,  0.00512762, ...,  0.00229082,
         0.03883978, -0.11940547],
       [ 0.00248251,  0.02393168,  0.00708579, ...,  0.00282289,
         0.05242368, -0.16129955],
       [ 0.00171478,  0.0114864 ,  0.00377236, ...,  0.00187873,
         0.02826371, -0.08703817]])

In [89]:
model = ANN(x.shape,20)

In [90]:
model.addLayers_Dense(4,ANN.activation_linear)
model.addLayers_Dense(13,ANN.activation_linear)
model.addLayers_Dense(1,ANN.activation_linear)

In [91]:
model.compile(ANN.optimizer_gradientDescent,ANN.loss_MSE)

In [92]:
model.layer_info

[['None', 'None', 8],
 ['Dense', <function __main__.ANN.activation_linear(z)>, 4],
 ['Dense', <function __main__.ANN.activation_linear(z)>, 13],
 ['Dense', <function __main__.ANN.activation_linear(z)>, 1],
 ['Loss', <function __main__.ANN.loss_MSE(P, y)>]]

In [93]:
import copy
w_old = copy.deepcopy(model.w_l_ij)
b_old = copy.deepcopy(model.b_l_j)
dldw = copy.deepcopy(model.dl_dw)

In [94]:
model.fit(x,y)

Epoch: 0	[

]	Error: 366688.9119047099
Epoch: 1	[]	Error: 31480.999319046063
Epoch: 2	[]	Error: 28374.784701982655
Epoch: 3	[]	Error: 28224.191505985633
Epoch: 4	[]	Error: 28166.07273035525
Epoch: 5	[]	Error: 28117.240705890184
Epoch: 6	[]	Error: 28071.190699136645
Epoch: 7	[]	Error: 28027.126672421728
Epoch: 8	[]	Error: 27984.879193785553
Epoch: 9	[]	Error: 27944.355682917587


In [95]:
model.w_l_ij

['None',
 array([[ 0.5892334 ,  0.89892337,  0.89271579,  0.81687445],
        [ 0.03109845,  0.68585777,  0.37241143,  0.51348419],
        [ 0.65724609,  0.19296765,  0.27136908,  0.7178546 ],
        [ 0.7827302 ,  0.8499998 ,  0.77490251,  0.03638461],
        [-0.23760754,  0.33590743, -0.18794221, -0.09996116],
        [ 0.85690945,  0.94892526,  0.5607987 ,  0.17805196],
        [ 0.76416856,  0.48496005,  0.62341497,  0.83317238],
        [ 0.47944692,  0.52052026,  0.70334518,  0.67002714]]),
 array([[0.32808148, 0.50952174, 0.26204796, 0.30903368, 0.62485426,
         0.55644286, 0.31705974, 0.39359329, 0.25639396, 0.58029745,
         0.16053744, 0.59605947, 0.82478461],
        [0.13625614, 0.71621244, 0.3872589 , 0.75898133, 0.75802996,
         0.76626419, 0.57423962, 0.64270821, 0.62530774, 0.39079684,
         0.50545953, 0.46324134, 0.02475277],
        [0.68021579, 0.59541138, 0.74832178, 0.43224367, 0.31592862,
         0.26440903, 0.53241453, 0.05399259, 0.60196876,

In [96]:
w_old

['None',
 array([[0.5881308 , 0.89771373, 0.89153073, 0.81583748],
        [0.03588959, 0.69175758, 0.37868094, 0.51851095],
        [0.65795147, 0.19385022, 0.2723164 , 0.71860593],
        [0.78300361, 0.85032764, 0.77524489, 0.03666431],
        [0.11669374, 0.7512807 , 0.23921822, 0.25480601],
        [0.85762553, 0.94977903, 0.56168686, 0.17878052],
        [0.77025193, 0.49238104, 0.63125307, 0.83949792],
        [0.4610394 , 0.49794007, 0.67941112, 0.65078591]]),
 array([[0.32920641, 0.51064106, 0.26362883, 0.31051155, 0.62685344,
         0.55744981, 0.31857956, 0.39484322, 0.25797459, 0.58224112,
         0.16162871, 0.59813382, 0.82582358],
        [0.15639172, 0.73430052, 0.40864343, 0.7786879 , 0.80397057,
         0.78607144, 0.59228702, 0.6644892 , 0.64656729, 0.42563648,
         0.51356833, 0.50125784, 0.03708381],
        [0.7081161 , 0.6204306 , 0.77780853, 0.45940947, 0.37980555,
         0.2918922 , 0.55722886, 0.0841636 , 0.63128167, 0.94457049,
         0.89123753

In [97]:
model.b_l_j

['None',
 array([[-0.07904818, -0.34092588,  0.35129059,  0.13232068]]),
 array([[ 0.05755212,  0.07234864,  0.70340903,  0.790923  , -0.08024858,
          0.07193192,  0.84753706,  0.46102015,  0.8726238 ,  0.49460246,
          0.75484849,  0.32801989,  0.79576466]]),
 array([[0.81965494]])]

In [98]:
b_old

['None',
 array([[0.26879524, 0.06732467, 0.77144514, 0.48098413]]),
 array([[0.10760726, 0.11700177, 0.7555236 , 0.83889446, 0.03553703,
         0.12144377, 0.89100806, 0.51509223, 0.92441901, 0.58124263,
         0.77325639, 0.42267689, 0.82546475]]),
 array([[0.96267414]])]

In [99]:
model.dl_dw

[['None'],
 array([[ 9.78929005e-03,  1.11279425e-02,  1.12054356e-02,
          9.52499771e-03],
        [ 6.55734074e-02,  7.45403498e-02,  7.50594361e-02,
          6.38030492e-02],
        [ 2.15356061e-02,  2.44805276e-02,  2.46510058e-02,
          2.09541854e-02],
        [ 4.76335129e-03,  5.41472352e-03,  5.45243073e-03,
          4.63474980e-03],
        [ 5.68439475e+00,  6.46171657e+00,  6.50671486e+00,
          5.53092683e+00],
        [ 1.07252731e-02,  1.21919181e-02,  1.22768205e-02,
          1.04357110e-02],
        [ 1.61351566e-01,  1.83415848e-01,  1.84693125e-01,
          1.56995378e-01],
        [-4.96882494e-01, -5.64829500e-01, -5.68762877e-01,
         -4.83467606e-01]]),
 array([[-0.27374504, -0.24015297, -0.26372317, -0.24119421, -0.65923413,
         -0.2758531 , -0.2105175 , -0.29206457, -0.26047706, -0.47116475,
         -0.06793692, -0.51841547, -0.1441026 ],
        [-0.02109217, -0.01850389, -0.02031998, -0.01858411, -0.05079426,
         -0.0212546 

In [100]:
dldw

In [101]:
# self.delta_l_ij = [i for i in range(0,lastIndex+1)]
# self.delta_l_ij[lastIndex] = (2 * (self.a_l_ij[lastIndex] - y).T * self.diff[self.layer_info[lastIndex][1]](self.a_l_ij[lastIndex]))
# self.dl_dw.append(self.a_l_ij[lastIndex-1].T @ self.delta_l_ij[lastIndex])
# for layerNo in range(lastIndex-1,0,-1):
#     self.delta_l_ij[layerNo] = (self.w_l_ij[layerNo+1] @self.delta_l_ij[layerNo+1].T).T * self.diff[self.layer_info[layerNo][1]](self.a_l_ij[layerNo])
#     self.dl_dw.append(self.a_l_ij[layerNo-1].T @ self.delta_l_ij[layerNo])
# self.dl_dw.append(['None'])
# self.dl_dw.reverse()

In [102]:
        # self.delta_l_ij = [i for i in range(0,lastIndex+1)]
        # self.delta_l_ij[lastIndex] = (2 * (self.a_l_ij[lastIndex] - y).T * self.diff[self.layer_info[lastIndex][1]](self.a_l_ij[lastIndex]))
        # self.dl_dw.append(self.a_l_ij[lastIndex-1].T @ self.delta_l_ij[lastIndex])
        # self.dl_db.append(self.a_l_ij[lastIndex])
        # for layerNo in range(lastIndex-1,0,-1):
        #     self.delta_l_ij[layerNo] = (self.w_l_ij[layerNo+1] @self.delta_l_ij[layerNo+1].T).T * self.diff[self.layer_info[layerNo][1]](self.a_l_ij[layerNo])
        #     self.dl_dw.append(self.a_l_ij[layerNo-1].T @ self.delta_l_ij[layerNo])
        #     self.dl_db.append(self.delta_l_ij[layerNo])
        # self.dl_dw.append(['None'])
        # self.dl_db.append(['None'])
        # self.dl_dw.reverse()
        # self.dl_dw.reverse()
        