# Simple Python Neural Network Example

This example comes from [Andrew Trask's](http://iamtrask.github.io/2015/07/12/basic-python-network/) blog.

We start with the original source, and then refactor it to better reflect the network structure and modularity
of the layers.

First we start out with a sample problem; training to implement this boolean function of 3 inputs:

In [146]:
import numpy as np

# Input features and desired output
X = np.array([ [0,0,1],[0,1,1],[1,0,1],[1,1,1] ])
y = np.array([[0,1,1,0]]).T

print("Input:\n%s" % X)
print("\nOutput:\n%s" % y)

Input:
[[0 0 1]
 [0 1 1]
 [1 0 1]
 [1 1 1]]

Output:
[[0]
 [1]
 [1]
 [0]]


## Here is the intial code as presented *a 9 line python neural network*.

In [147]:
syn0 = 2*np.random.random((3,4)) - 1
syn1 = 2*np.random.random((4,1)) - 1
for j in range(10000):
    l1 = 1/(1+np.exp(-(np.dot(X,syn0))))
    l2 = 1/(1+np.exp(-(np.dot(l1,syn1))))
    l2_delta = (y - l2)*(l2*(1-l2))
    l1_delta = l2_delta.dot(syn1.T) * (l1 * (1-l1))
    syn1 += l1.T.dot(l2_delta)
    syn0 += X.T.dot(l1_delta)

print("Output After Training:")
print(l1)
print(l2)

Output After Training:
[[  2.57918557e-01   9.21504930e-01   9.97693994e-01   9.18618144e-01]
 [  9.55758425e-01   1.67356634e-02   9.53851147e-01   9.99403861e-01]
 [  9.01425533e-01   5.58647232e-02   5.96264330e-01   2.95224215e-01]
 [  9.98243757e-01   8.57795042e-05   6.59045947e-02   9.84180673e-01]]
[[ 0.00972044]
 [ 0.98582196]
 [ 0.99442382]
 [ 0.01366351]]


## Parameterize the number of nodes in the hidden layer:

In [148]:
inputs = 3
hidden = 6
outputs = 1

syn0 = 2*np.random.random((inputs, hidden)) - 1
syn1 = 2*np.random.random((hidden, outputs)) - 1
for j in range(10000):
    l1 = 1/(1+np.exp(-(np.dot(X,syn0))))
    l2 = 1/(1+np.exp(-(np.dot(l1,syn1))))
    l2_delta = (y - l2)*(l2*(1-l2))
    l1_delta = l2_delta.dot(syn1.T) * (l1 * (1-l1))
    syn1 += l1.T.dot(l2_delta)
    syn0 += X.T.dot(l1_delta)

print("Output After Training:")
print(l1)
print(l2)

Output After Training:
[[  3.29861707e-01   4.87333065e-01   9.28959058e-01   6.24234836e-01
    4.75311182e-02   9.47274097e-01]
 [  6.84175162e-01   8.58767574e-02   6.96815740e-02   6.42400971e-02
    9.38153342e-05   8.21921132e-01]
 [  6.21704258e-02   1.01912036e-01   9.99760359e-01   9.17550475e-02
    9.42373607e-01   7.49117964e-01]
 [  2.25857446e-01   1.10903066e-02   9.59833695e-01   4.15745844e-03
    2.98289076e-02   4.34098123e-01]]
[[ 0.00868254]
 [ 0.98998334]
 [ 0.99058551]
 [ 0.0091875 ]]


## Factoring the sigmoid function and it's derivative:

In [149]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_prime(s):
    return s * (1 - s)

syn0 = 2*np.random.random((inputs, hidden)) - 1
syn1 = 2*np.random.random((hidden, outputs)) - 1
for j in range(10000):
    l1 = sigmoid(np.dot(X,syn0))
    l2 = sigmoid(np.dot(l1,syn1))
    # Display errors every 1000 trials
    if j % 1000 == 0:
        print(y - l2)
    l2_delta = (y - l2) * sigmoid_prime(l2)
    l1_delta = l2_delta.dot(syn1.T) * sigmoid_prime(l1)
    syn1 += l1.T.dot(l2_delta)
    syn0 += X.T.dot(l1_delta)

print("Output After Training:")
print(syn0)
print(syn1)
print(l1)
print(l2)

[[-0.34394513]
 [ 0.63121779]
 [ 0.67397988]
 [-0.35125098]]
[[-0.03724754]
 [ 0.04589352]
 [ 0.05385622]
 [-0.05586488]]
[[-0.02054541]
 [ 0.02542788]
 [ 0.029888  ]
 [-0.03060701]]
[[-0.01529301]
 [ 0.01910957]
 [ 0.02244183]
 [-0.02290625]]
[[-0.01256785]
 [ 0.01583408]
 [ 0.01857342]
 [-0.01893238]]
[[-0.01085267]
 [ 0.01376791]
 [ 0.01613093]
 [-0.01643228]]
[[-0.00965419]
 [ 0.01231977]
 [ 0.01441831]
 [-0.01468316]]
[[-0.00875973]
 [ 0.01123549]
 [ 0.01313583]
 [-0.01337529]]
[[-0.00806117]
 [ 0.01038596]
 [ 0.01213104]
 [-0.01235171]]
[[-0.00749719]
 [ 0.00969798]
 [ 0.01131743]
 [-0.01152352]]
Output After Training:
[[ 6.58089673 -1.67118631  3.55879544  0.31293784  4.48448712  3.46793134]
 [ 6.30458744  4.69220161 -1.47520456  0.34485138 -2.65903204  3.95088554]
 [-2.37198712 -0.21481039  0.35361032  0.5496883   1.08767116 -5.65736125]]
[[ 11.63749313]
 [ -4.84379727]
 [ -2.3102205 ]
 [  0.59174479]
 [ -3.70706066]
 [ -7.49750063]]
[[ 0.08533451  0.4464974   0.5874911   0.634

## Factor each Layer into a class to be more DRY:

In [183]:
class Layer(object):
    def __init__(self, n, m, sigmoid=True):
        """
        Define an n => m node weighted layer.
        
        We accept n input features, and compute m output nodes.
        
        Applies a sigmoid function to the output by default.
        """
        # (n x m) weights
        # The ith column represents the weights of the inputs
        # to derive the ith output [1..m] via weighted sum.
        self.weights = np.random.normal(0, m ** -0.5, (n, m))
        self.sigmoid = sigmoid
        
    def forward(self, inputs):
        """
        Takes some number, k, of input samples (rows) of n inputs each.
        
        Each output row is the list of m output node values for the
        corresponding input row - a (k x m) matrix.
        """
        self.inputs = inputs
        # (k x n) . (n x m) => (k x m)
        self.outputs = np.dot(inputs, self.weights)
        if self.sigmoid:
            self.outputs = sigmoid(self.outputs)
        return self.outputs
    
    def backward(self, output_errors, learning_rate):
        """
        Propagate error terms from outputs back to inputs.
        
        output_errors is a list of k error outputs (k x m) matrix.
        
        Returns the error term for the previous layer - a
        (k x n) matrix.
        
        As a side-effect, the weights of this layer are updated.
        """
        if self.sigmoid:
            output_errors = output_errors * sigmoid_prime(self.outputs)
        input_errors = output_errors.dot(self.weights.T)
        self.weights += learning_rate * np.dot(self.inputs.T, output_errors)
        return input_errors 

## Use the `Layer` class instances to organize a training session.

In [151]:
syn0 = Layer(inputs, hidden)
syn1 = Layer(hidden, outputs)

for j in range(10000):
    l1 = syn0.forward(X)
    l2 = syn1.forward(l1)
    l2_error = y - l2
    # Display errors every 1000 trials
    if j % 1000 == 0:
        print(l2_error)
    l1_error = syn1.backward(l2_error)
    syn0.backward(l1_error)
    
print("Output After Training:")
print(syn0.weights)
print(syn1.weights)
print(l1)
print(l2)

[[-0.46804683]
 [ 0.54988525]
 [ 0.54659366]
 [-0.43679974]]
[[-0.04281892]
 [ 0.04455356]
 [ 0.04293979]
 [-0.04800706]]
[[-0.02356408]
 [ 0.02463439]
 [ 0.02374855]
 [-0.02636336]]
[[-0.01763946]
 [ 0.01848655]
 [ 0.0178312 ]
 [-0.01972262]]
[[-0.01456724]
 [ 0.01529431]
 [ 0.01475734]
 [-0.016283  ]]
[[-0.01262948]
 [ 0.01327905]
 [ 0.01281578]
 [-0.01411499]]
[[-0.01127188]
 [ 0.01186615]
 [ 0.01145389]
 [-0.0125968 ]]
[[-0.01025597]
 [ 0.01080824]
 [ 0.01043373]
 [-0.01146112]]
[[-0.00946057]
 [ 0.00997951]
 [ 0.00963427]
 [-0.01057218]]
[[-0.00881689]
 [ 0.00930853]
 [ 0.00898679]
 [-0.00985296]]
Output After Training:
[[ 2.21817204  5.78599737 -3.49642226  2.41612497  2.13018973  4.23393599]
 [ 2.06550783  5.85352782 -3.58215081 -3.0271495  -1.48930045  4.14233137]
 [-3.34340136 -2.35605255  1.04267721 -1.55560375  1.14076067 -6.39847985]]
[[-4.23764058]
 [ 8.30156058]
 [-4.75495957]
 [ 2.81350375]
 [-3.06121008]
 [-8.63364816]]
[[ 0.03411284  0.08658659  0.73936349  0.17427923 

## Define a Network class to train a multi-layer network.

In [174]:
class NeuralNetwork(object):
    def __init__(self, *nodes, learning_rate=0.5):
        """
        Create a multi-layer network.  Arguments are the number of layers in each
        layer from input through output.
        """
        self.layers = [Layer(nodes[i], nodes[i + 1]) for i in range(len(nodes) - 1)]
        self.learning_rate = learning_rate
        
    def train(self, inputs, outputs, iterations=10000, learning_rate=None):
        if learning_rate is not None:
            self.learning_rate = learning_rate
        for i in range(iterations):
            errors = self.train_once(inputs, outputs)
            if i % 1000 == 0:
                print(errors)

    def train_once(self, inputs, outputs):
        # Run forward
        inputs = self.run(inputs)
        
        # Back-propogate errors
        output_errors = outputs - inputs
        
        errors = output_errors
        for layer in self.layers[::-1]:
            errors = layer.backward(errors, learning_rate=self.learning_rate)

        return output_errors
    
    def run(self, inputs):
        for layer in self.layers:
            inputs = layer.forward(inputs)
        return inputs

    def __str__(self):
        return "\n\n".join(["Layer %d:\n%s" % (i, layer.weights) for i, layer in enumerate(self.layers)])

In [182]:
n = NeuralNetwork(inputs, hidden, outputs)
n.train(X, y, learning_rate=.1)
print(n.run(X))
print(n)

[[-0.18578175]
 [ 0.82067767]
 [ 0.81755938]
 [-0.17793474]]
[[-0.50025766]
 [ 0.50548572]
 [ 0.49391415]
 [-0.49742329]]
[[-0.47780446]
 [ 0.50251536]
 [ 0.4801639 ]
 [-0.49519774]]
[[-0.29817122]
 [ 0.38536646]
 [ 0.35736329]
 [-0.42419254]]
[[-0.13784236]
 [ 0.17356422]
 [ 0.16469592]
 [-0.19224731]]
[[-0.08401498]
 [ 0.10276179]
 [ 0.09793975]
 [-0.11239353]]
[[-0.06260451]
 [ 0.07560252]
 [ 0.0721675 ]
 [-0.08219357]]
[[-0.05117485]
 [ 0.06134181]
 [ 0.05860381]
 [-0.06645194]]
[[-0.04397995]
 [ 0.05245183]
 [ 0.05013765]
 [-0.05668246]]
[[-0.03897969]
 [ 0.04631438]
 [ 0.04428796]
 [-0.04995853]]
[[ 0.03527101]
 [ 0.95821543]
 [ 0.96003205]
 [ 0.04500722]]
Layer 0:
[[ 4.1547118   2.53523042 -0.37744838 -2.79234374 -0.0184172  -4.97231585]
 [-3.04860579 -3.80911928  1.18785961 -2.59016786 -1.03111899 -5.0762254 ]
 [ 1.5004911  -1.26494153  0.46167019  4.13969323  0.12702238  1.76406609]]

Layer 1:
[[-4.62067983]
 [ 4.59243842]
 [-0.76097149]
 [ 5.47028807]
 [ 0.99607044]
 [-7.0151