In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

##### X -> [number of hours you study, number of hours you sleep]
##### y -> [Score in a test]

In [2]:
X = np.array(([3, 5], [5, 1], [10, 2]), dtype=float)
y = np.array(([75], [82], [93]), dtype=float)

#### Scale the inputs

In [3]:
X = X/np.amax(X, axis=0)
y = y/100 #maximum test score is 100

#### Forward Propogation

In [4]:
class Neural_Network(object):
    def __init__(self):
        #Define hyperparameters
        self.inputLayerSize = 2
        self.outputLayerSize = 1
        self.hiddenLayerSize = 3
        
        #Weights (Parameters)
        self.W1 = np.random.randn(self.inputLayerSize, \
                              self.hiddenLayerSize)
        self.W2 = np.random.randn(self.hiddenLayerSize, \
                              self.outputLayerSize)
    def forward(self, X):
        #Propogate inputs through network
        self.z2 = np.dot(X, self.W1)
        self.a2 = self.sigmoid(self.z2)
        self.z3 = np.dot(self.a2, self.W2)
        yhat = self.sigmoid(self.z3)
        return yhat
    
    def sigmoid(self, z):
    #Apply sigmoid activation function to scalar, vector
        return 1/(1+np.exp(-z))
    
    def costFunctionPrime(self, X, y):
        #Compute derivative with respect to W1 and W2
        self.yhat = self.forward(X)

        delta3 = np.multiply(-(y-self.yhat), self.sigmoidPrime(self.z3))
        dJdW2 = np.dot(self.a2.T, delta3)
        delta2 = np.dot(delta3, self.W2.T)*self.sigmoidPrime(self.z2)
        dJdW1 = np.dot(X.T, delta2)

        return dJdW1, dJdW2

    def sigmoidPrime(self, z):
        #Derivative of Sigmoid function
        return np.exp(-z)/((1+np.exp(-z))**2) 
    
    def costFunction(self, X, y):
        #Compute cost for given X,y, use weights already stored in class.
        self.yHat = self.forward(X)
        J = 0.5*sum((y-self.yHat)**2)
        return J
    
    
    #Helper functions for interacting with other methods/classes
    
    def getParams(self):
        #Get W1 and W2 rolled into vector
        params = np.concatenate((self.W1.ravel(), self.W2.ravel()))
        return params
    
    def setParams(self, params):
        #Set W1 and W2 using single parameter vector:
        W1_start = 0
        W1_end = self.hiddenLayerSize*self.inputLayerSize
        self.W1 = np.reshape(params[W1_start:W1_end], \
                            (self.inputLayerSize, self.hiddenLayerSize))
        W2_end = W1_end + self.hiddenLayerSize*self.outputLayerSize
        self.W2 = np.reshape(params[W1_end:W2_end], \
                            (self.hiddenLayerSize, self.outputLayerSize))
        
    def computeGradients(self, X, y):
        dJdW1, dJdW2 = self.costFunctionPrime(X, y)
        return np.concatenate((dJdW1.ravel(), dJdW2.ravel()))

In [5]:
NN = Neural_Network()

In [6]:
dJdW1, dJdW2 = NN.costFunctionPrime(X, y)

In [7]:
dJdW1

array([[ 0.00679107, -0.01460781,  0.03825656],
       [ 0.0047274 , -0.01202321,  0.04768394]])

In [8]:
dJdW2

array([[-0.18978541],
       [-0.15242721],
       [-0.24517627]])

In [9]:
cost1 = NN.costFunction(X,y)

In [10]:
scalar = 3
NN.W1 = NN.W1 + scalar*dJdW1
NN.W2 = NN.W2 + scalar*dJdW2
cost2 = NN.costFunction(X,y)

In [11]:
print (cost1, cost2) 

[0.55433766] [0.84923474]


In [12]:
dJdW1, dJdW2 = NN.costFunctionPrime(X, y)
NN.W1 = NN.W1 - scalar*dJdW1
NN.W2 = NN.W2 - scalar*dJdW2
cost3 = NN.costFunction(X,y)

In [13]:
cost3

array([0.70384811])

In [14]:
def computeNumericalGradient(N, X, y):
        paramsInitial = N.getParams()
        numgrad = np.zeros(paramsInitial.shape)
        perturb = np.zeros(paramsInitial.shape)
        e = 1e-4

        #test one gradient at a time
        for p in range(len(paramsInitial)):
            #Set perturbation vector
            perturb[p] = e
            #adding epsilon and compute cost function
            N.setParams(paramsInitial + perturb)
            loss2 = N.costFunction(X, y)
            
            #subtracting epsilon and compute cost function
            N.setParams(paramsInitial - perturb)
            loss1 = N.costFunction(X, y)

            #Compute Numerical Gradient
            numgrad[p] = (loss2 - loss1) / (2*e)

            #Return the value we changed to zero:
            perturb[p] = 0
            
        #Return Params to original value:
        N.setParams(paramsInitial)

        return numgrad

In [15]:
numgrad = computeNumericalGradient(NN, X, y)
grad = NN.computeGradients(X, y)

In [16]:
numgrad

array([ 0.0145585 , -0.00332531,  0.03755512,  0.01084566, -0.00292524,
        0.05017884, -0.15716647, -0.12195872, -0.20127383])

In [17]:
grad

array([ 0.0145585 , -0.00332531,  0.03755512,  0.01084566, -0.00292524,
        0.05017884, -0.15716647, -0.12195872, -0.20127383])