# Neural network from scratch

In [1]:
import numpy as np
import pandas as pd
import scipy  as scp
from scipy import optimize

In [2]:
class neural_network(object):
    def __init__(self,Lambda):
        #Initialize all hyperparameters
        self.inputLayerSize=2
        self.hiddenLayerSize=3
        self.outputLayerSize=1

        # Initialize weights 
        self.w_1 = np.random.randn(self.inputLayerSize,self.hiddenLayerSize)
        self.w_2 = np.random.randn(self.hiddenLayerSize,self.outputLayerSize)
        
        #initialize biases
        self.b_1 = np.random.uniform(self.hiddenLayerSize)
        self.b_2 = np.random.uniform(self.outputLayerSize)

        self.Lambda = Lambda

    def sigmoid(self,z):
        # Sigmoid activation function
        return 1/(1+np.exp(-z))
    
    def sigmoid_diff(self,z):
        # Gradient of Sigmoid function
        return np.exp(-z)/((1+np.exp(-z))**2)
    
    def softmax(self,z):
        #Softmax activation function
        return z/np.sum(z,axis=0)

    def relu(self,z):
        # ReLu activation function
        relu = list()
        for row in z:
            rr=[]
            for col in row:
                if col<=0:
                    rr.append(0)
                else:
                    rr.append(col)
            relu.append(rr)
        relu=np.array(relu)    
        return relu
    
    def relu__diff(self,z):
        # ReLu gradient activation function
        self.relu_diff = list()
        for row in z:
            rr=[]
            for col in row:
                if col<=0:
                    rr.append(0)
                else:
                    rr.append(1)
            self.relu_diff.append(rr)
        self.relu_diff=np.array(relu)    
        return self.relu_diff

    def forward_nn(self,X):
        # Forward pass of the neural network
        self.z_2 = np.dot(X,self.w_1) #+ self.b_1
        self.a_2 = self.sigmoid(self.z_2)
        self.z_3 = np.dot(self.a_2,self.w_2) #+ self.b_2
        y_hat = self.sigmoid(self.z_3)
        
        return y_hat
    
    def cost_function(self,X,y):
        # Computing the cost of the given X and y
        self.y_hat = self.forward_nn(X)
        J = sum((y-self.y_hat)**2)/X.shape[0] #\
                # + (self.Lambda/2)*(sum(self.w_1**2+self.w_2**2))
        
        return J
    
    def cost_function_diff(self,X,y):
        # compute the derivative w.r.t. w and w1 for a given X and y
        self.y_hat = self.forward_nn(X)

        delta3  = np.multiply(-(y-self.y_hat),self.sigmoid_diff(self.z_3))
        dJdW2 = np.dot(self.a_2.T, delta3)/X.shape[0] #+ self.Lambda*self.w_2

        delta2 = np.dot(delta3,self.w_2.T)*self.sigmoid_diff(self.z_2)        
        dJdW1 = np.dot(X.T,delta2)/X.shape[0] #+ self.Lambda*self.w_1

        # print(dJdW1,dJdW2)
        return dJdW1,dJdW2

    def backward_nn(self,X,y):
        # Compute the Gradients
        dJdW1 , dJdW2 = self.cost_function_diff(X,y)
        return np.concatenate((dJdW1.ravel(),dJdW2.ravel()))
    
    # Helper functions - get/set functions
    def getParams(self):
        # Get W1 and W2 using single parameter vector
        params = np.concatenate((self.w_1.ravel(),self.w_2.ravel()))
        return params
    
    def setParams(self, params):
        # Set W1 and W2 using single parameter vector
        W1_start = 0
        W1_end = self.hiddenLayerSize*self.inputLayerSize
        
        self.w_1 = np.reshape(params[W1_start:W1_end],(self.inputLayerSize,self.hiddenLayerSize))
        W2_end = W1_end + self.hiddenLayerSize*self.inputLayerSize
        self.w_2 = np.reshape(params[W1_end:W2_end],(self.hiddenLayerSize,self.outputLayerSize))



In [3]:
# training Class
class trainer(object):
    def __init__(self,nn):
        # Make reference to the Neural Network
        self.N = nn 
    
    def callbacks(self,params):
        self.N.setParams(params)
        self.J.append(self.N.cost_function(self.X, self.y))
        self.J.append(self.N.cost_function(self.testX, self.testY))
    
    def costFunctionWrapper(self, params, X, y):
        self.N.setParams(params)
        cost = self.N.cost_function(X,y)
        gradient = self.N.backward_nn(X,y)
        #print(cost,gradient)
        return cost , gradient
    
    def train(self,trainX,trainY, testX, testY):
        # Make an internal variable for the callback function
        self.X = trainX
        self.y = trainY

        self.testX = testX
        self.testY = testY

        # Empty lists to store costs
        self.J = []
        self.testJ =[]

        params_0 = self.N.getParams()

        options = {'maxiter': 300, 'disp': True}

        _res = optimize.minimize(self.costFunctionWrapper, params_0, jac = True, \
                                 method = 'BFGS', args=(trainX,trainY), options = options, \
                                 callback=self.callbacks)
        
        self.N.setParams(_res.x)
        self.optimizationResults = _res



In [4]:
Lambda = 0.0001
if __name__ == '__main__':
    NN = neural_network(Lambda=Lambda)
    trainX = np.array(([[4,7],[6,2],[10,5]]), dtype=float)
    trainy = np.array(([51],[76],[89]), dtype=float)

    #trainX = np.random.randint(low=1,high=12,size=(10000,2))
    #trainy = np.random.randint(low=50,high=100,size=(10000,1))

    print("Training X:\n",trainX)

    testX = np.array(([[4,5.5],[3,1],[10,3]]), dtype=float)
    testy = np.array(([70],[45],[85]), dtype=float)

    trainX = trainX/np.max(trainX,axis=0)
    
    trainy = trainy/100
    
    testX = testX/np.max(testX,axis=0)
    testY = testy/100

    T = trainer(NN)
    T.train(trainX,trainy,testX,testy)

    print("Train input:\n",trainX)
    print("Expected training output y:\n", trainy)
    
    y_hat=NN.forward_nn(trainX)
    print("predicted Train output:\n",y_hat)

    print("TEST input:\n",testX)
    print("Expected output y:\n", testy)
    
    y_hat=NN.forward_nn(testX)
    print("predicted output:\n",y_hat)

Training X:
 [[ 4.  7.]
 [ 6.  2.]
 [10.  5.]]
Optimization terminated successfully.
         Current function value: 0.000033
         Iterations: 37
         Function evaluations: 40
         Gradient evaluations: 40
Train input:
 [[0.4        1.        ]
 [0.6        0.28571429]
 [1.         0.71428571]]
Expected training output y:
 [[0.51]
 [0.76]
 [0.89]]
predicted Train output:
 [[0.50000001]
 [0.75996061]
 [0.88991619]]
TEST input:
 [[0.4        1.        ]
 [0.3        0.18181818]
 [1.         0.54545455]]
Expected output y:
 [[70.]
 [45.]
 [85.]]
predicted output:
 [[0.50000001]
 [0.81797441]
 [0.75918426]]
