<a href="https://colab.research.google.com/github/sagalpreet/Deep-Neural-Network/blob/master/psinet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import matplotlib.pyplot as plt

'''
Things to add later include adding asserts in proper places to avoid user from invalid inputs
'''

# number of layer includes both the input layer and the outer layer.

class psinet:
    
    # initializing the neural network involves defining its architecture in the form of 
    # number of layers and number of nodes in each of them
    # 0th index in list_of_number_of_nodes_in_each_layer denotes input layer
    # dictionary to contain the values of parameters is also defined
    
    def __init__(self, number_of_layers, list_of_number_of_nodes_in_each_layer, act, lf):
        # act denotes list_of_activation_functions_to_be_applied - from layer one to output layer although indexing goes as
        # usual lists of python i.e from 0
        # lf denotes the loss function
        self.num_l = number_of_layers
        self.num_n = list_of_number_of_nodes_in_each_layer
        self.parameters = {}
        self.a = {}
        self.act = act
        self.lf = lf
        
        # initializing parameters
        
        for i in range(1, self.num_l):
            self.parameters['w'+str(i)] = np.random.random((self.num_n[i], self.num_n[i-1]))
            self.parameters['b'+str(i)] = np.random.random((self.num_n[i], 1))
        
        # values to be stored at each layer - this will be initialized during training as the number of training examples to
        # be sent in a batch remains an attribute which when taken at the time of training makes much more sense
    
    
    
    
    # input and output array are expected to be given as input
    
    def train(self, x, y, alpha):
                
        num_training_examples = x.shape[1]
        
        # initializing the values to be stored at each layer i.e 'a' in notation
        # i = 0 in the following loop corresponds to input layer denoted by a[0] 
        
        for i in range(self.num_l):
            self.a[i] = np.random.random((self.num_n[i], num_training_examples))
        
        self.a[0] = x
        
        
        # forward propagation
        
        for i in range(1, self.num_l):
            # A is the function which returns the activated value of z given the type of activation function and z as input
            
            self.a[i] =  A ( self.act[i-1], ( ( self.parameters['w'+str(i)] @ self.a[i-1] ) + self.parameters['b'+str(i)] ) )
        
        
        # back propagation
        
        # In theory we understood backpropagation as recursion with last layer derivative as base case.
        # We follow the same paradigm here.
        
        # we use da as a variable and alter its value at each step as storing it is not required
        
        # base case
        
        # dC is a function that returns the partial derivative of the cost function wrt to the nodes in the output layer.
        da = dC (self.lf, self.a[self.num_l-1], y)
        
        # recursive step
        
        for i in range(self.num_l-1, 0, -1):
            Ia = I(self.act[i-1], self.a[i]) # derivative of a wrt z in terms of a, nd array of same dimensions as that of a
            dz = da * Ia
            db = np.sum(dz,axis=1,keepdims=True)
            dw = dz @ (self.a[i-1].T)
            
            # gradient descent
            self.parameters["w"+str(i)] = self.parameters["w"+str(i)] - ( alpha * (dw) ) 
            self.parameters["b"+str(i)] = self.parameters["b"+str(i)] - ( alpha * (db) )
            
            da = (self.parameters["w"+str(i)].T) @ dz
            
        # computing cost
        
        cost = C (self.lf, self.a[self.num_l-1], y)
        
        return cost
        
    
    def evaluate(self, x):
        
        num_training_examples = x.shape[1]
        
        # initializing the values to be stored at each layer i.e 'a' in notation
        # i = 0 in the following loop corresponds to input layer denoted by a[0] 
        
        for i in range(self.num_l):
            self.a[i] = np.random.random((self.num_n[i], num_training_examples))
        
        self.a[0] = x
        
        # forward propagation
        
        for i in range(1, self.num_l):
            # A is the function which returns the activated value of z given the type of activation function and z as input
            
            self.a[i] =  A ( self.act[i-1], ( ( self.parameters['w'+str(i)] @ self.a[i-1] ) + self.parameters['b'+str(i)] ) )
              
        return self.a[self.num_l-1]
    
    def test(self, x, y):
        
        predicted = self.evaluate(x)
        
        # computing cost
        cost = C (self.lf, predicted, y)
        print("Cost: ",cost)
        
        # computing accuracy
        accuracy = 100*(np.sum(predicted==y))/y.size
        


# A is the function which returns the activated value of z given the type of activation function and z as input
def A(f, z):
    if f=="sigmoid" :
        return 1/(1+np.exp(-z))
    if f=="relu" :
        return (z+np.abs(z))/2
        

# dC is a function that returns the partial derivative of the cost function wrt to the nodes in the output layer.
def dC(lf, yhat, y):
    if lf=="mse" : # mean squared error
        return 2*(yhat-y)
    if lf=="cel" : # cross entropy loss
        return (yhat-y)/(yhat*(1-yhat))


def C(lf, yhat, y):
    if lf=="mse" : # mean squared error
        return np.sum( np.square(yhat-y) ,axis = 1, keepdims = True)
    if lf=="cel" : # cross entropy loss
        return np.sum( -y*np.log(yhat)-(1-y)*np.log(1-yhat) ,axis = 1, keepdims = True)
    

# derivative of a wrt z in terms of a, nd array of same dimensions as that of a
def I(f, a):
    if f=="sigmoid" :
        return a*(1-a)
    if f=="relu" :
        return 1*(a>0)