<a href="https://colab.research.google.com/github/ram-anand/ram-anand.github.io/blob/main/Multiple_Layer_Neural_Network_Implementation(Forward_and_Back_Propagation_Gradient_Descent).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Custom Neural Network

Implemented a neural network 
  -  supports multiple layers
  -   custom loss functions (MSE, MLE and MAE)
  -  batch gradient descent
  - RELU, SIGMOID and TANH activations
  - Simple structure for forward and back propagation

In [3]:
import numpy as np
import math

In [8]:
class CustomNeuralNetwork:
    def __init__(self):
        self.n = 0 # number of samples
        self.m = 0 # number of variables
        self.epoch = 1 # number of epochs
        self.learning_rate = 0.001
        self.X = None # X data in input examples
        self.Y = None # Y labels in input examples
        self.YP = 0.0
        self.layer_count = 0
        self.layer_variables = [] 
        self.layer_weights = []
        self.layer_bias = []
        self.layer_activation = []
        self.layer_output = {}
        self.layer_derivative = {}
        self.training_loss = 0.0
        self.loss_method = None
        
#     def model_input_labels(self, data, labels):
#         self.n = np.array(data).shape[0] # number of input examples
#         self.m = np.array(labels).shape[1] # number of variables
#         self.X = data
#         self.Y = labels
        
    def model_add_layer(self, input_shape, list_layerdepth_layertype_tuple=[(1,"relu")]):
        self.n = input_shape[0]
        self.m = input_shape[1]
        for layerdepth, layertype in list_layerdepth_layertype_tuple:
            if self.layer_count == 0:
                self.layer_weights.append(np.random.rand(self.m, layerdepth))
            else:
                self.layer_weights.append(np.random.rand(self.layer_variables[self.layer_count-1], layerdepth))
            self.layer_bias.append(np.random.rand(1, layerdepth))
            self.layer_variables.append(layerdepth)
            self.layer_activation.append(layertype)
            self.layer_count += 1
        print("total layer count: ", self.layer_count)
        print("layers: ", list_layerdepth_layertype_tuple)
           
    def relu(self, a):
        b = np.maximum(a, 0.0)
        #a[a < 0.0] = 0.0
        c = np.greater(a, 0.0).astype(int)
        return b, c  # data, derivative

    def sigmoid(self, a):
        b = 1/np.exp(-a)
        return b, b * (1-b) # data, derivative 

    def tanh(self, a):
        b = (np.exp(a) - np.exp(-a))/(np.exp(a) + np.exp(-a))
        return b, (1-b**2) # data, derivative         

    def model_feed_forward(self, data):
        # calculate output for each layer: from 0th (first) layer to last layer
        print("input data: \n", data)
        for layer in range(self.layer_count):
            print("-------------------------------------------------")
            print("layer: ", layer+1, " of ", self.layer_count)
#             print("layer variables: ", self.layer_variables[layer])
#             print("layer weight: \n", self.layer_weights[layer])
#             print("layer bias: \n", self.layer_bias[layer])
            self.X = data
            if layer == 0:
                z = (np.dot(self.X, self.layer_weights[layer]) + self.layer_bias[layer])#.reshape(self.n, self.layer_variables[layer])
                print("layer sum: \n", z)
            else:
                z = (np.dot(self.layer_output[layer-1], self.layer_weights[layer]) + self.layer_bias[layer])#.reshape(self.n, self.layer_variables[layer])
                print("layer sum: \n", z)
            if self.layer_activation[layer] == "relu":
                self.layer_output[layer] = self.relu(z)[0]
                self.layer_derivative[layer] = self.relu(z)[1]
                print("layer output: \n", self.relu(z)[0])
                #print("layer output: \n", self.layer_output[layer])
            elif self.layer_activation[layer] == "sigmoid":
                self.layer_output[layer] = self.sigmoid(z)[0]
                self.layer_derivative[layer] = self.sigmoid(z)[1]
                print("layer output: \n", self.sigmoid(z)[0])
#             print("layer output: \n", self.layer_output[layer])
            elif self.layer_activation[layer] == "tanh":
                self.layer_output[layer] = self.tanh(z)[0]
                self.layer_derivative[layer] = self.tanh(z)[1]
                print("layer output: \n", self.tanh(z)[0])
#             print("layer output: \n", self.layer_output[layer])
            else:
                print("activation type : ", self.layer_activation[layer], "is undefined, use predefine activations - relu, sigmoid or tanh")
                return None
        return self.layer_output[layer]
        
    def model_training(self, data=None, labels=None, losstype = "mse", learning_rate = 0.01, epoch=1):

        if (data is None):
            print("please provide sample data in data=X, labels=Y form in model training")
        else:
            self.X = data
            if (self.n != np.array(data).shape[0] or self.m != np.array(data).shape[1]):
                print("reconfigure layer required as data dimension ",(self.n, self.m) ," did not match with layer input dimension ", (np.array(data).shape[0], np.array(data).shape[1]))

#             self.n = np.array(data).shape[0] # number of input examples
#             self.m = np.array(labels).shape[1] # number of variables
                      
        if (labels is None):
            print("please provide sample data in data=X, labels=Y form in model training")
        else:
            self.Y = labels

        self.loss_method = losstype
        self.learning_rate = learning_rate
        self.epoch = epoch
        for i in range(self.epoch): 
            print("************************************************")
            print("training epoch: ", i)
            print("************************************************")
            
            print("input dim: ",(self.n, self.m), ", input data: \n", self.X)
            print("label dim: ",(self.n, 1), ", labels: \n", self.Y)

            # feed foreward output at last layer (output layer)
            self.YP = self.model_feed_forward(self.X)
            
            # calculating error 
            print(("==============================================="))
            if self.loss_method == "mle":
                self.training_loss = (1/self.n) * np.sum(-self.Y * np.log(self.YP) - (1-self.Y) * np.log(1+self.YP))
                self.training_derivative = ((-self.Y/self.YP) + (1-self.Y)/(1-self.YP)).reshape(self.n,1)
            elif self.loss_method == "mae":
                self.training_loss = (1/self.n) * np.sum(np.abs(self.Y - self.YP))
                self.training_derivative = 1
            elif self.loss_method == "mse":
                #self.training_loss = (0.5/self.n) * np.sum(np.dot((self.Y - self.YP).T, (self.Y - self.YP)))
                #self.training_loss = (0.5/self.n) * ((self.Y - self.YP)**2).mean(axis=0)
                self.training_loss = (0.5/self.n) * (np.square(self.Y - self.YP)).mean(axis=0)
                self.training_derivative = -(1/self.n) * np.sum(np.abs(self.Y - self.YP))
            else:
                print("loss type : ", self.loss_method, "is undefined, use predefine loss methods - mae, mle or mse")
                break
            
            print(self.loss_method, " - training loss: ", self.training_loss)
            print("===============================================")
            
            # derivative for each layer : from last layer to 0th (first) layer
            for layer in range(self.layer_count-1,-1,-1):
                if layer == self.layer_count-1:
                    self.layer_derivative[layer] = self.training_derivative * self.layer_derivative[layer]
                    #self.layer_derivative[layer] = self.training_derivative * (self.layer_output[layer] * (1 - self.layer_output[layer]))
                else:
                    # current layer derivative = next layer derivative x next layer weight.transpose * (current layer output *(1 - current layer output))
                    self.layer_derivative[layer] = np.dot(self.layer_derivative[layer+1], self.layer_weights[layer+1].T) * self.layer_derivative[layer]
                    #self.layer_derivative[layer] = np.dot(self.layer_derivative[layer+1], self.layer_weights[layer+1].T) * (self.layer_output[layer] * (1 - self.layer_output[layer]))
            
            # weight and bias update at each layer : from last layer to 0th(first) layer
            for layer in range(self.layer_count-1,-1,-1):  
                if layer == 0:
                    print("old weight: \n", self.layer_weights[layer])
                    print("old bias: \n", self.layer_bias[layer])
                    # current layer weights = current layer weights - (current layer input.transpose x current layer derivative)
                    self.layer_weights[layer] = self.layer_weights[layer] - (self.learning_rate/self.n) * np.dot(self.X.T, self.layer_derivative[layer])
                    self.layer_bias[layer] = self.layer_bias[layer] - (self.learning_rate/self.n) * np.sum(self.layer_derivative[layer], keepdims=True)                    
                    print("new weight: \n", self.layer_weights[layer])
                    print("new bias: \n", self.layer_bias[layer])
                    print("layer: ", layer, " weight and bias updated")
                    print("===============================================")
                else:
                    print("old weight: \n", self.layer_weights[layer])
                    print("old bias: \n", self.layer_bias[layer])
                    # current layer weights = current layer weights - (current layer input.transpose x current layer derivative)
                    self.layer_weights[layer] = self.layer_weights[layer] - (self.learning_rate/self.n) * np.dot(self.layer_output[layer-1].T, self.layer_derivative[layer])
                    self.layer_bias[layer] = self.layer_bias[layer] - (self.learning_rate/self.n) * np.sum(self.layer_derivative[layer], keepdims=True)
                    print("new weight: \n", self.layer_weights[layer])
                    print("new bias: \n", self.layer_bias[layer])
                    print("layer: ", layer, " weight and bias updated")
                    print("===============================================")

Train model using training data and labels

In [9]:
number_of_samples = 5
number_of_variables = 4
inputs = np.random.rand(number_of_samples, number_of_variables)
labels = np.random.randint(0,2, (number_of_samples, 1))

for row, label in zip(inputs, labels):
  print("input data: ", row, "label: ", label)

input data:  [0.54809226 0.54372535 0.80092463 0.81469276] label:  [1]
input data:  [0.22277819 0.51712384 0.4016781  0.08952102] label:  [0]
input data:  [0.98859575 0.34532742 0.16519472 0.38896186] label:  [0]
input data:  [0.86346987 0.7173575  0.83334805 0.40107228] label:  [1]
input data:  [0.55170577 0.00600045 0.09947874 0.93172151] label:  [1]


In [12]:
model = CustomNeuralNetwork()
model.model_add_layer((5,4), [(2, "tanh"),(2, "relu"),(2, "relu"),(1, "tanh")])
model.model_training(inputs, labels, learning_rate=0.1, losstype = "mle", epoch=2)

total layer count:  4
layers:  [(2, 'tanh'), (2, 'relu'), (2, 'relu'), (1, 'tanh')]
************************************************
training epoch:  0
************************************************
input dim:  (5, 4) , input data: 
 [[0.54809226 0.54372535 0.80092463 0.81469276]
 [0.22277819 0.51712384 0.4016781  0.08952102]
 [0.98859575 0.34532742 0.16519472 0.38896186]
 [0.86346987 0.7173575  0.83334805 0.40107228]
 [0.55170577 0.00600045 0.09947874 0.93172151]]
label dim:  (5, 1) , labels: 
 [[1]
 [0]
 [0]
 [1]
 [1]]
input data: 
 [[0.54809226 0.54372535 0.80092463 0.81469276]
 [0.22277819 0.51712384 0.4016781  0.08952102]
 [0.98859575 0.34532742 0.16519472 0.38896186]
 [0.86346987 0.7173575  0.83334805 0.40107228]
 [0.55170577 0.00600045 0.09947874 0.93172151]]
-------------------------------------------------
layer:  1  of  4
layer sum: 
 [[1.63246719 1.82893736]
 [0.75224142 0.89268844]
 [1.30077738 1.61531701]
 [1.65051489 1.90571426]
 [1.23941911 1.47067329]]
layer output: 


In [13]:
example = np.random.rand(4)
print("input data: ", example)
output = (model.model_feed_forward(example) > 0).astype(int)
print("label: ", output)

input data:  [0.22473111 0.38303981 0.27818426 0.49010033]
input data: 
 [0.22473111 0.38303981 0.27818426 0.49010033]
-------------------------------------------------
layer:  1  of  4
layer sum: 
 [[0.82715186 0.99019288]]
layer output: 
 [[0.67894373 0.75744456]]
-------------------------------------------------
layer:  2  of  4
layer sum: 
 [[0.8902617  1.63693712]]
layer output: 
 [[0.8902617  1.63693712]]
-------------------------------------------------
layer:  3  of  4
layer sum: 
 [[1.34643398 1.76311124]]
layer output: 
 [[1.34643398 1.76311124]]
-------------------------------------------------
layer:  4  of  4
layer sum: 
 [[1.16043095]]
layer output: 
 [[0.82118027]]
label:  [[1]]
