In [1]:
#Initialize the inputs 

import numpy as np
import matplotlib.pyplot as plot

X = np.array(([2, 8],[3, 7],[10, 5]), dtype=float)
y = np.array(([18], [29], [95]), dtype=float)
X = X / np.amax(X, axis=0)
y = y / 100

Initialize the NN. Run forward propagation with random weights

In [2]:
from neural_network import NeuralNetwork 
nn = NeuralNetwork(
        input_layer_size=2,
        hidden_layer_size=3,
        output_layer_size=1)
yHat = nn.forward(X)
yHat

array([[ 0.74246966],
       [ 0.73690391],
       [ 0.703842  ]])

In [3]:
nn.weights_1

array([[ 1.64657381, -0.55829147, -2.57905481],
       [-0.59848332, -0.34142671, -0.31638264]])

In [4]:
nn.weights_2

array([[ 0.66773197],
       [ 0.81037391],
       [ 1.49979044]])

In [5]:
nn.yHat

array([[ 0.74246966],
       [ 0.73690391],
       [ 0.703842  ]])

In [6]:
y

array([[ 0.18],
       [ 0.29],
       [ 0.95]])

In [7]:
nn.eval_function(np.dot(nn.hidden_layer, nn.weights_2))

array([[ 0.74246966],
       [ 0.73690391],
       [ 0.703842  ]])

In [8]:
delta3 = np.multiply(-(y-nn.yHat), nn.eval_prime_function(nn.z3))

In [9]:
dJdW2 = np.dot(nn.hidden_layer.T, delta3)
dJdW2

array([[ 0.04917643],
       [ 0.05897821],
       [ 0.05205164]])

In [10]:
cost1 = nn.cost(X, y)
cost1

array([ 0.28834449])

In [11]:
scalar = 3.
dJdW1, dJdW2 = nn.cost_function_prime(X,y)
nn.weights_1 = nn.weights_1 - (dJdW1 * scalar )
nn.weights_2 = nn.weights_2 - (dJdW2 * scalar )
cost2 = nn.cost(X,y)

In [12]:
cost1, cost2

(array([ 0.28834449]), array([ 0.25233364]))

Check correctness of numerical gradient computation by using the definition of a derivative

In [13]:
def compute_num_gradient(N, X, y):
    initial_weights = N.get_weights()
    numerical_gradient = np.zeros(initial_weights.shape)
    perturb = np.zeros(initial_weights.shape)
    e = 1e-4
    for p in xrange(len(initial_weights)):
        perturb[p] = e
        N.set_weights(initial_weights + perturb)
        loss1 = N.cost(X, y)
        
        N.set_weights(initial_weights - perturb)
        loss2 = N.cost(X, y)
        
        numerical_gradient[p] = (loss1 - loss2) / (2*e)
        
        perturb[p] = 0

    N.set_weights(initial_weights)
    
    return numerical_gradient

In [14]:
implemented_gradient = nn.compute_gradients(X, y)
computed_gradient = compute_num_gradient(nn, X, y)


In [15]:
implemented_gradient

array([ 0.00020108, -0.00063402,  0.00227391,  0.00650987,  0.00716483,
        0.01463402,  0.01187516,  0.01730082,  0.01540291])

In [16]:
computed_gradient

array([ 0.00011109, -0.00173446,  0.00760449,  0.01973311,  0.02162361,
        0.0440449 ,  0.03546942,  0.05171244,  0.04580564])

In [17]:
#check that difference is small
np.linalg.norm(implemented_gradient - computed_gradient) / np.linalg.norm(implemented_gradient + computed_gradient)

0.49936846351771408

In [18]:
trainX = np.array(([2, 8],[3, 7],[10, 5], [6,1.5]), dtype=float)
trainY = np.array(([18], [29], [95], [76]), dtype=float)
testX = np.array(([5, 5], [3,1], [9,3], [6, 2]), dtype=float)
testY = np.array(([60], [22], [85], [75]), dtype=float)

#normalize
trainX = trainX/np.amax(trainX, axis=0)
trainY = trainY/100 
testX = testX/np.amax(trainX, axis=0)
testY = testY/100 



In [19]:
trainX, trainY

(array([[ 0.3,  1. ],
        [ 0.5,  0.2],
        [ 1. ,  0.4],
        [ 0.6,  0.3]]), array([[ 0.75],
        [ 0.82],
        [ 0.93],
        [ 0.7 ]]))

In [20]:
nn.train(trainX, trainY, testX, testY)

Optimization terminated successfully.
         Current function value: 0.002618
         Iterations: 103
         Function evaluations: 105
         Gradient evaluations: 105
