# Simple Python Neural Network Example

This example comes from [Andry Trask's](http://iamtrask.github.io/2015/07/12/basic-python-network/) blog.

We start with the original source, and then refactor it to better reflect the network structure and modularity
of the layers.

First we start out with a sample problem; training to implement this boolean function of 3 inputs:

In [39]:
import numpy as np

# Input features and desired output
X = np.array([ [0,0,1],[0,1,1],[1,0,1],[1,1,1] ])
y = np.array([[0,1,1,0]]).T

print("Input:\n%s" % X)
print("\nOutput:\n%s" % y)

Input:
[[0 0 1]
 [0 1 1]
 [1 0 1]
 [1 1 1]]

Output:
[[0]
 [1]
 [1]
 [0]]


## Here is the intial code as presented *a 9 line python neural network*.

In [50]:
syn0 = 2*np.random.random((3,4)) - 1
syn1 = 2*np.random.random((4,1)) - 1
for j in range(10000):
    l1 = 1/(1+np.exp(-(np.dot(X,syn0))))
    l2 = 1/(1+np.exp(-(np.dot(l1,syn1))))
    l2_delta = (y - l2)*(l2*(1-l2))
    l1_delta = l2_delta.dot(syn1.T) * (l1 * (1-l1))
    syn1 += l1.T.dot(l2_delta)
    syn0 += X.T.dot(l1_delta)

print("Output After Training:")
print(l1)
print(l2)

Output After Training:
[[  8.55114497e-01   1.80588202e-01   7.27012227e-01   6.03414158e-01]
 [  9.99846808e-01   9.37900800e-01   9.75790495e-01   2.46398533e-02]
 [  4.26092650e-02   2.83578519e-04   9.79447142e-01   2.18868093e-02]
 [  9.80086286e-01   1.90686030e-02   9.98615422e-01   3.71387120e-04]]
[[ 0.00521183]
 [ 0.98918934]
 [ 0.98715071]
 [ 0.01437938]]


## Parameterize the number of nodes in the hidden layer:

In [102]:
inputs = 3
hidden = 6
outputs = 1

syn0 = 2*np.random.random((inputs, hidden)) - 1
syn1 = 2*np.random.random((hidden, outputs)) - 1
for j in range(10000):
    l1 = 1/(1+np.exp(-(np.dot(X,syn0))))
    l2 = 1/(1+np.exp(-(np.dot(l1,syn1))))
    l2_delta = (y - l2)*(l2*(1-l2))
    l1_delta = l2_delta.dot(syn1.T) * (l1 * (1-l1))
    syn1 += l1.T.dot(l2_delta)
    syn0 += X.T.dot(l1_delta)

print("Output After Training:")
print(l1)
print(l2)

Output After Training:
[[ 0.13145509  0.25543757  0.86541247  0.81829263  0.3741695   0.42095258]
 [ 0.96775253  0.00358605  0.11485183  0.17155396  0.04266259  0.00639713]
 [ 0.9806165   0.84593094  0.99919242  0.99721182  0.66397542  0.84943384]
 [ 0.99990032  0.05446175  0.96148983  0.9426812   0.12837519  0.04758618]]
[[ 0.00562359]
 [ 0.99163764]
 [ 0.99154173]
 [ 0.01014183]]


## Factoring the sigmoid function and it's derivative:

In [99]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_prime(s):
    return s * (1 - s)

syn0 = 2*np.random.random((inputs, hidden)) - 1
syn1 = 2*np.random.random((hidden, outputs)) - 1
for j in range(10000):
    l1 = sigmoid(np.dot(X,syn0))
    l2 = sigmoid(np.dot(l1,syn1))
    # Display errors every 1000 trials
    if j % 1000 == 0:
        print(y - l2)
    l2_delta = (y - l2) * sigmoid_prime(l2)
    l1_delta = l2_delta.dot(syn1.T) * sigmoid_prime(l1)
    syn1 += l1.T.dot(l2_delta)
    syn0 += X.T.dot(l1_delta)

print("Output After Training:")
print(syn0)
print(syn1)
print(l1)
print(l2)

[[-0.53036947]
 [ 0.47419884]
 [ 0.46304225]
 [-0.532378  ]]
[[-0.04664108]
 [ 0.04872763]
 [ 0.0477411 ]
 [-0.03931221]]
[[-0.02652521]
 [ 0.02648729]
 [ 0.02737482]
 [-0.02230832]]
[[-0.02026229]
 [ 0.01982997]
 [ 0.02098207]
 [-0.01705751]]
[[-0.01697022]
 [ 0.01639875]
 [ 0.01760939]
 [-0.01430092]]
[[-0.01487057]
 [ 0.01423898]
 [ 0.01545327]
 [-0.01254304]]
[[-0.01338594]
 [ 0.01272685]
 [ 0.01392612]
 [-0.01129985]]
[[-0.01226629]
 [ 0.01159539]
 [ 0.01277287]
 [-0.01036199]]
[[-0.01138374]
 [ 0.0107093 ]
 [ 0.01186285]
 [-0.00962248]]
[[-0.0106653 ]
 [ 0.00999197]
 [ 0.01112139]
 [-0.00902027]]
Output After Training:
[[ 6.17520303  5.15374032 -3.95911529]
 [-6.27232511 -4.94450224  3.85133366]
 [-3.19630462  2.65492189 -2.17548783]]
[[ 11.12251765]
 [ -6.05455807]
 [  6.1899009 ]]
[[  3.93055436e-02   9.34312781e-01   1.01975300e-01]
 [  7.72343960e-05   9.19901814e-02   8.42351857e-01]
 [  9.51611003e-01   9.99593949e-01   2.16202356e-03]
 [  3.57978314e-02   9.46045267e-01   

## Factor each Layer into a class to be more DRY:

In [84]:
class Layer(object):
    def __init__(self, n, m, sigmoid=True):
        """
        Define an n => m node weighted layer.
        
        We accept n input features, and compute m output nodes.
        
        Applies a sigmoid function to the output by default.
        """
        # (n x m) weights
        # The ith column represents the weights of the inputs
        # to derive the ith output [1..m] via weighted sum.
        self.weights = 2 * np.random.random((n, m)) - 1
        self.sigmoid = sigmoid
        
    def forward(self, inputs):
        """
        Takes some number, k, of input samples (rows) of n inputs each.
        
        Each output row is the list of m output node values for the
        corresponding input row - a (k x m) matrix.
        """
        self.inputs = inputs
        # (k x n) . (n x m) => (k x m)
        self.outputs = np.dot(inputs, self.weights)
        if self.sigmoid:
            self.outputs = sigmoid(self.outputs)
        return self.outputs
    
    def backward(self, output_errors):
        """
        Propogate error terms from outputs back to inputs.
        
        output_errors is a list of k error outputs (k x m) matrix.
        
        Returns the error term for the previous layer - a
        (k x n) matrix.
        
        As a side-effect, the weights of this layer are updated.
        """
        if self.sigmoid:
            output_errors = output_errors * sigmoid_prime(self.outputs)
        input_errors = output_errors.dot(self.weights.T)
        self.weights += np.dot(self.inputs.T, output_errors)
        return input_errors
        

## Use the `Layer` class instances to organize a training session.

In [103]:
syn0 = Layer(inputs, hidden)
syn1 = Layer(hidden, outputs)

for j in range(10000):
    l1 = syn0.forward(X)
    l2 = syn1.forward(l1)
    l2_error = y - l2
    # Display errors every 1000 trials
    if j % 1000 == 0:
        print(l2_error)
    l1_error = syn1.backward(l2_error)
    syn0.backward(l1_error)
    
print("Output After Training:")
print(syn0.weights)
print(syn1.weights)
print(l1)
print(l2)

[[-0.50498807]
 [ 0.55807692]
 [ 0.38318375]
 [-0.5621711 ]]
[[-0.03335727]
 [ 0.04587461]
 [ 0.04671861]
 [-0.05682241]]
[[-0.01837836]
 [ 0.025517  ]
 [ 0.02738914]
 [-0.03275339]]
[[-0.01376553]
 [ 0.01923011]
 [ 0.02111858]
 [-0.02513282]]
[[-0.01137163]
 [ 0.01595154]
 [ 0.01777302]
 [-0.02109465]]
[[-0.00986068]
 [ 0.01387434]
 [ 0.01562106]
 [-0.01850654]]
[[-0.00880152]
 [ 0.01241387]
 [ 0.01409084]
 [-0.01667057]]
[[-0.00800859]
 [ 0.01131782]
 [ 0.01293208]
 [-0.01528274]]
[[-0.00738753]
 [ 0.0104576 ]
 [ 0.01201585]
 [-0.01418691]]
[[-0.0068848 ]
 [ 0.00976004]
 [ 0.01126814]
 [-0.01329368]]
Output After Training:
[[-4.60854275  5.49316519 -1.33387205  3.47998052  1.43228555 -5.51970359]
 [-4.93891888 -6.96242521  0.48092396 -1.075712   -0.11942778  3.41726649]
 [ 1.29813543 -2.49734614 -0.75119578  0.06232811 -0.02589833 -1.39597961]]
[[ -5.95787557]
 [ 10.27050811]
 [  0.96643355]
 [ -3.93578204]
 [ -1.65376938]
 [  7.05509592]]
[[  7.85519853e-01   7.60449167e-02   3.2056

## Define a Network class to train a multi-layer network.

In [126]:
class Network(object):
    def __init__(self, *nodes):
        """
        Create a multi-layer network.  Arguments are the number of layers in each
        layer from input through output.
        """
        self.layers = [Layer(nodes[i], nodes[i + 1]) for i in range(len(nodes) - 1)]
        
    def train(self, inputs, outputs, iterations=10000):
        for i in range(iterations):
            errors = self.train_once(inputs, outputs)
            if i % 1000 == 0:
                print(errors)
            
    def train_once(self, inputs, outputs):
        # Run forward
        inputs = self.run(inputs)
        
        # Back-propogate errors
        output_errors = outputs - inputs
        
        errors = output_errors
        for layer in self.layers[::-1]:
            errors = layer.backward(errors)

        return output_errors
    
    def run(self, inputs):
        for layer in self.layers:
            inputs = layer.forward(inputs)
        return inputs

    def __str__(self):
        return "\n\n".join(["Layer %d:\n%s" % (i, layer.weights) for i, layer in enumerate(self.layers)])

In [127]:
n = Network(inputs, hidden, outputs)
n.train(X, y)
print(n.run(X))
print(n)

[[-0.31877264]
 [ 0.62053412]
 [ 0.70381587]
 [-0.35101251]]
[[-0.03794456]
 [ 0.03687753]
 [ 0.04630621]
 [-0.04704316]]
[[-0.02208775]
 [ 0.0215292 ]
 [ 0.026338  ]
 [-0.02678495]]
[[-0.01676477]
 [ 0.01644009]
 [ 0.0197934 ]
 [-0.0201897 ]]
[[-0.01393031]
 [ 0.01373327]
 [ 0.01634623]
 [-0.0167206 ]]
[[-0.01211886]
 [ 0.01200168]
 [ 0.01415832]
 [-0.01451897]]
[[-0.01083975]
 [ 0.01077697]
 [ 0.012621  ]
 [-0.0129714 ]]
[[-0.00987758]
 [ 0.00985406]
 [ 0.011469  ]
 [-0.01181104]]
[[-0.00912145]
 [ 0.00912747]
 [ 0.01056649]
 [-0.01090134]]
[[-0.00850788]
 [ 0.0085368 ]
 [ 0.00983602]
 [-0.01016452]]
[[ 0.00799761]
 [ 0.99195527]
 [ 0.99077013]
 [ 0.00955264]]
Layer 0:
[[ 0.95686602 -6.05247567  5.473298    1.75891924 -3.07118592  1.25046143]
 [-2.72781871 -5.64285032 -4.50652517 -2.88898525 -3.7403061  -2.54093422]
 [-0.09123591  2.31909978  2.4112387  -0.85533304  5.18432584 -0.50637394]]

Layer 1:
[[ 3.05264146]
 [-9.18576988]
 [-7.10714627]
 [ 3.0159911 ]
 [ 6.8802426 ]
 [ 2.3037