In [0]:
import numpy as np
import random

'''
A Neuron has a set of weights and every Neuron has the same bias
It receives the inputs from the feed_forward function and calculates
output by squashing it.
'''
class Neuron:
  
  def __init__(self, bias):
    self.bias = bias
    self.weights = []
    
  def feed_forward(self, inputs):
    self.inputs = inputs
    return self.calculate_output()
  
  def calculate_output(self):
    self.output = np.dot(self.inputs, self.weights) + self.bias
    return self.squash()
  
  def calculate_error(self, target_output):
    return 0.5 * (target_output - self.output) ** 2
  
  def squash(self):
    return 1/(1 + np.exp(-self.output))
  
  ''' Partial derivative error wrt each weight is given by
      dE/dw.  Applying the chain rule twice results in the following
      dE/dw = dE/dy * dy/dz * dz/dw
      where E is the cost function
            y is the unit's output
            z is the unit's input
            w is the weight
  '''
  
  # Function for dE/dy
  def cost_pd_wrt_output(self, target_output):
    return -(target_output - self.output)
  
  # Function for dy/dz
  def out_pd_wrt_input(self):
    return self.output*(1-self.output)
  
  # Function for dz/dw
  def input_pd_wrt_weight(self, index):
    return self.inputs[index]

'''
A NeuronLayer has the number of neurons in that layer and a
Neurons list.
'''
class NeuronLayer:
  
  def __init__(self, num_neurons):
    self.num_neurons = num_neurons
    self.neurons = []
    for i in range(0, num_neurons):
      self.neurons.append(Neuron(np.random.randint(0,1)))
    
  def feed_forward(self, inputs):
    outputs = []
    for neuron in self.neurons:
      outputs.append(neuron.feed_forward(inputs))
    self.outputs = outputs
    return self.outputs
  
  def inspect(self):
    print('Neurons:', len(self.neurons))
    for n in range(len(self.neurons)):
        print(' Neuron', n)
        for w in range(len(self.neurons[n].weights)):
            print('  Weight:', self.neurons[n].weights[w])
            print('  Bias:', self.neurons[n].bias)

'''
A NeuralNet has a number of inputs and a hidden and output
layer
'''
class NeuralNet:
  
  LEARNING_RATE = 0.5
  
  def __init__(self, num_inputs, num_hidden, num_output):
    self.num_inputs = num_inputs
    
    self.hidden_layer = NeuronLayer(num_hidden)
    self.output_layer = NeuronLayer(num_output)
    
    self.init_weights_from_inputs_to_hidden_layer_neurons()
    self.init_weights_from_hidden_to_output_layer_neurons()
    
    
  def init_weights_from_inputs_to_hidden_layer_neurons(self):
    for neuron in self.hidden_layer.neurons:
      for i in range(self.num_inputs):
        neuron.weights.append(random.random())
   
  def init_weights_from_hidden_to_output_layer_neurons(self):
    for neuron in self.output_layer.neurons:
      for i in range(len(self.hidden_layer.neurons)):
        neuron.weights.append(random.random())
        
  def inspect(self):
    print('------')
    print('* Inputs: {}'.format(self.num_inputs))
    print('------')
    print('Hidden Layer')
    self.hidden_layer.inspect()
    print('------')
    print('* Output Layer')
    self.output_layer.inspect()
    print('------')
    
  def feed_forward(self, inputs):
    hidden_layer_outputs = self.hidden_layer.feed_forward(inputs)
    return self.output_layer.feed_forward(hidden_layer_outputs)
  
  def train(self, train_inputs, train_outputs):
    self.feed_forward(train_inputs)
    
    # First obtain dE/dz to avoid recalculating
    output_error_pd_wrt_input = [0] * len(self.output_layer.neurons)
    for i in range(len(self.output_layer.neurons)):
      output_error_pd_wrt_input[i] = self.output_layer.neurons[i].cost_pd_wrt_output(train_outputs[i]) * self.output_layer.neurons[i].out_pd_wrt_input()
    
    # Obtain hidden dE/dz with values calculated before
    hidden_error_pd_wrt_input = [0] * len(self.hidden_layer.neurons)
    for i in range(len(self.hidden_layer.neurons)):
      
      total_error_sum = 0
      for w_index in range(len(self.output_layer.neurons)):
        total_error_sum += output_error_pd_wrt_input[w_index] * self.output_layer.neurons[w_index].weights[i]
      
      hidden_error_pd_wrt_input[i] = total_error_sum * self.hidden_layer.neurons[i].out_pd_wrt_input()
      
    # Update output neuron weights
    for i in range(len(self.output_layer.neurons)):
      
      for w_index in range(len(self.output_layer.neurons[i].weights)):
        weight_delta = output_error_pd_wrt_input[i] * self.output_layer.neurons[i].input_pd_wrt_weight(w_index)
        
        self.output_layer.neurons[i].weights[w_index] -= self.LEARNING_RATE * weight_delta
        
    # Update hidden neuron weights
    for i in range(len(self.hidden_layer.neurons)):
      
      for w_index in range(len(self.hidden_layer.neurons[i].weights)):
        weight_delta = hidden_error_pd_wrt_input[i] * self.hidden_layer.neurons[i].input_pd_wrt_weight(w_index)
        
        self.hidden_layer.neurons[i].weights[w_index] = self.LEARNING_RATE * weight_delta
        
  # Calculate total error of network
  def calculate_total_error(self, training_sets):
    total_error = 0
    for t in range(len(training_sets)):
      self.feed_forward(training_sets[t][0])
      for i in range(len(training_sets[t][1])):
        total_error += self.output_layer.neurons[i].calculate_error(training_sets[t][1][i])

    return total_error

In [2]:
training_sets = [
    [[0,0], [0]],
    [[0,1], [1]],
    [[1,0], [1]],
    [[1,1], [0]]
]

nn = NeuralNet(len(training_sets[0][0]),2,len(training_sets[0][1]))
for i in range(1000):
  training_inputs, training_outputs = random.choice(training_sets)
  nn.train(training_inputs, training_outputs)
  print(i, nn.calculate_total_error(training_sets))

0 0.6575532756984387
1 0.6672400586175491
2 0.6775373205775999
3 0.641275292348509
4 0.6526614037064281
5 0.6176863374682529
6 0.6299257865075477
7 0.6417433553796353
8 0.6531109757083833
9 0.6640152603383536
10 0.6283660754571601
11 0.5953514400807061
12 0.6081999180670048
13 0.620727340446334
14 0.5884838992625453
15 0.5604116832339506
16 0.5735151005250364
17 0.5866587102927436
18 0.5996685228350456
19 0.6124191261286366
20 0.6248231992220229
21 0.6368227747479964
22 0.648382162640578
23 0.6594823360951576
24 0.6240844896932074
25 0.6361097210752991
26 0.6476963989829189
27 0.6588246251484908
28 0.6694870136832483
29 0.6335653821913703
30 0.6000694175891521
31 0.6128103891943317
32 0.5814495824061756
33 0.5945299629387144
34 0.6073958054027757
35 0.5766894879641802
36 0.5505618400011627
37 0.5634798790005782
38 0.5766114067406726
39 0.5897372016348172
40 0.6026957374436868
41 0.572592302127022
42 0.547200687249599
43 0.5272046329222317
44 0.5388011657315145
45 0.551251908250385
46 0