## Backpropagation Example Implementation

This example has been adapted form the below blog post:

https://mattmazur.com/2015/03/17/a-step-by-step-backpropagation-example/

For the mathematical explaination read my blog post and neural network fundamentals series here:

https://samzee.net/2019/02/20/neural-networks-learning-the-basics-backpropagation/

The standard process for building the neural network is as follows:
1. Initialize Network
2. Forward Propagate
3. Back Propagate Error
4. Train Network

### Reproduce example in blog post

In [1]:
import numpy as np
import random
import io
import pandas as pd
import plotly.express as px
import numpy

In [192]:
def initialze_network(n_input, n_hidden, n_output):
    network = []
    hidden_layer = {'weights': [np.random.rand(n_input,n_hidden)]}
    network.append(hidden_layer)
    hidden_layer = {'weights': [np.random.rand(n_input,n_hidden)]}
    network.append(hidden_layer)
    output_layer = {'weights': [np.random.rand(n_hidden,n_output)]}
    network.append(output_layer)
    return network
#defines sigmoid function
def sigmoid(x):
    s = 1/(1 + np.exp(-x))
    return s
#derivative of the loss function delta rule
def dloss(target, output):
    error = -(target - output)
    return error
#forward pass
def forward_pass(weights, inputs):
    a = np.dot(weights, inputs)
    return a

In [193]:
#create the network
random.seed(1)
network = initialze_network(2, 2, 2)
weights = [neuron['weights'] for neuron in network]

In [194]:
#testing example
x = np.array([[0.01,0.2],[0.15,0.08]])
y = np.array([[0.01,0.5],[0.7,0.1]])
z = np.array([[0.24,0.55],[0.35,0.6]])

inputs = np.array([0.5, 0.1])
target = np.array([0.1, 0.99])

In [195]:
#update randomised inputs to fixed inputs
weights[0][0] = x
weights[1][0] = y
weights[2][0] = z

In [196]:
h1 = forward_pass(weights[0][0], inputs)
h2 = forward_pass(weights[1][0], h1)
#output
outputs = forward_pass(weights[2][0], h2)

In [197]:
outputs

array([0.02421  , 0.0300925])

In [198]:
#the backward pass outputs
def out_gradient(target, output, inputs):
    gradient = -(target - output)*inputs
    return gradient

In [199]:
h2

array([0.04175, 0.0258 ])

In [200]:
target

array([0.1 , 0.99])

In [201]:
grad = [out_gradient(target, outputs, item) for item in h2]
#output gradients
gradients = np.vstack(grad).T
gradients

array([[-0.00316423, -0.00195538],
       [-0.04007614, -0.02476561]])

In [202]:
def hid_gradient(target, output, inputs, weight, neurons):
    neuron = []
    for i in range(len(output)):
        s_o = -(target[i] - output[i])
        k = s_o*weight[:, neurons][i]
        neuron.append(k)
    out = np.sum(neuron)*inputs
    return out

In [203]:
n_hidden_two = 2
w = weights[2][0]
mygrads = []
for neuron in range(n_hidden_two):
    grad = np.array([hid_gradient(target, outputs, myinputs, w , neuron) for myinputs in h1])
    mygrads.append(grad)
mygrads = np.vstack(mygrads)
mygrads

array([[-0.00885393, -0.02939505],
       [-0.01544073, -0.05126321]])

In [204]:
def hid_two_layer(target, outputs, inputs, w1, neuron, w2):
    mygrads = []
    for i in range(len(w1)):
        grad = hid_gradient(target, outputs, w1[:,neuron][i], w2,i)
        mygrads.append(grad)
    xsum = np.sum(mygrads)*inputs
    return xsum

In [205]:
n_hidden_two = 2
h2_mygrads = []
w1 = weights[1][0]
w2 = weights[2][0]
for neuron in range(n_hidden_two):
    grad = np.array([hid_two_layer(target, outputs, myinputs, w1, neuron, w2) for myinputs in inputs])
    h2_mygrads.append(grad)
h2_mygrads = np.vstack(h2_mygrads)
h2_mygrads

array([[-0.21794094, -0.04358819],
       [-0.11942076, -0.02388415]])

In [206]:
#set the learning rate
#update weights
l = 0.5
weights[0][0] = weights[0][0] - l*h2_mygrads
weights[1][0] = weights[1][0] - l*mygrads
weights[2][0] = weights[2][0] - l*gradients

In [207]:
weights

[[array([[0.11897047, 0.22179409],
         [0.20971038, 0.09194208]])], [array([[0.01442697, 0.51469752],
         [0.70772036, 0.1256316 ]])], [array([[0.24158212, 0.55097769],
         [0.37003807, 0.61238281]])]]

In [208]:
weights[2][0]

array([[0.24158212, 0.55097769],
       [0.37003807, 0.61238281]])

## Put Everything Together

In [209]:
def initialze_network(n_input, n_hidden, n_output):
    network = []
    hidden_layer = {'weights': [np.random.rand(n_input,n_hidden)]}
    network.append(hidden_layer)
    hidden_layer = {'weights': [np.random.rand(n_input,n_hidden)]}
    network.append(hidden_layer)
    output_layer = {'weights': [np.random.rand(n_hidden,n_output)]}
    network.append(output_layer)
    return network
#forward pass
#defines sigmoid function
def sigmoid(x):
    s = 1/(1 + np.exp(-x))
    return s
#derivative of the loss function delta rule
def dloss(target, output):
    error = -(target - output)
    return error
#forward pass
def forward_pass(weights, inputs):
    a = np.dot(weights, inputs)
    return a
def out_gradient(target, output, inputs):
    gradient = -(target - output)*inputs
    return gradient

#onehidden layers
def hid_gradient(target, output, inputs, weight, neurons):
    neuron = []
    for i in range(weight.shape[1]):
        s_o = -(target - output)
        k = s_o*weight[:, neurons][i]
        neuron.append(k)
    out = np.sum(neuron)*inputs
    return out
def hid_two_layer(target, outputs, inputs, w1, neuron, w2):
    mygrads = []
    for i in range(len(w1)):
        grad = hid_gradient(target, outputs, w1[:,neuron][i], w2,i)
        mygrads.append(grad)
    xsum = np.sum(mygrads)*inputs
    return xsum

#loss function
def lossfunction(target,output):
    loss = (1/2)*(target - output)**2
    return loss

In [132]:
def train_network(network, inputs, target, l, n_epoch):
    weights = [neuron['weights'] for neuron in network]
    for epoch in range(n_epoch):
        myloss = []
        mygrads = []
        h2_mygrads = []
        for i in range(len(inputs)):
            h1 = forward_pass(weights[0][0][0], inputs[i])
            h2 = forward_pass(weights[1][0], h1)
            outputs = forward_pass(weights[2][0][0], h2)
            loss = lossfunction(target[i], outputs)
            t_loss = np.sum(loss)
            myloss.append(t_loss)
            grad = [out_gradient(target[i], outputs, item) for item in h2]
            #print("this is output gradient {} for iteration {}".format(grad, i))
            gradients = np.vstack(grad).T     #output gradients
            n_hidden_two = 2
            w = weights[2][0]
            #hidden layer two
            for neuron in range(n_hidden_two):
                grad = np.array([hid_gradient(target, outputs, myinputs,weights[2][0] , neuron) for myinputs in h1])
            mygrads.append(grad)
            mygrads = np.vstack(mygrads)
            #hidden layer one
            n_hidden_two = 2
            w1 = weights[1][0]
            w2 = weights[2][0]
            for neuron in range(n_hidden_two):
                grad = np.array([hid_two_layer(target, outputs, myinputs, w1, neuron, w2) for myinputs in inputs[i]])
            h2_mygrads.append(grad)
            h2_mygrads = np.vstack(h2_mygrads)
            weights[0][0] = weights[0][0] - l*h2_mygrads
            weights[1][0] = weights[1][0] - l*mygrads
            weights[2][0][0] = weights[2][0][0] - l*gradients
            final_loss = np.sum(myloss)
            #print('>epoch={}, error={}'.format(n_epoch, final_loss))
            return final_loss

In [133]:
#training samples
inputs = np.array([[2.7810836, 2.550537003], [1.465489372, 2.362125076], [396561688, 4.400293529], [1.38807019, 1.850220317],
                  [3.06407232, 3.005305973],[7.627531214, 2.759262235],[5.332441248, 2.088626775],[6.922596716, 1.77106367],
                  [8.675418651, 0.242068655], [7.673756466, 3.508563011]])
target = np.array([[0], [0], [0], [0], [0], [1], [1], [1], [1], [1]])

Dataset from:

https://machinelearningmastery.com/implement-backpropagation-algorithm-scratch-python/

An alternative implementation can also be accessed via this link

In [134]:
#initialize network
network = initialze_network(inputs.shape[1], 2, target.shape[1])
weights = [neuron['weights'] for neuron in network]

In [135]:
network

[{'weights': [array([[0.76235475, 0.61464184],
          [0.3652777 , 0.60058028]]), array([0.01487713, 0.829767  ])]},
 {'weights': [array([[0.4104964 , 0.64708764],
          [0.88038413, 0.86201736]]), array([0.27902185, 0.99723562])]},
 {'weights': [array([[0.70818411, 0.95309679],
          [0.42618508, 0.11536369]]), array([0.08711459, 0.11089362])]}]

In [136]:
weights[2][0].shape[1]

2

In [137]:
myloss = []
mygrads = []
h2_mygrads = []
h1 = forward_pass(weights[0][0], inputs[0])
h2 = forward_pass(weights[1][0], h1)
outputs = forward_pass(weights[2][0][0], h2)
loss = lossfunction(target[0], outputs)
t_loss = np.sum(loss)
myloss.append(t_loss)
grad = [out_gradient(target[0], outputs, item) for item in h2]
#print("this is output gradient {} for iteration {}".format(grad, i))
gradients = np.vstack(grad).T     #output gradients
n_hidden_two = 2
w = weights[2][0]
#hidden layer two
for neuron in range(n_hidden_two):
    grad = np.array([hid_gradient(target, outputs, myinputs, weights[2][0] , neuron) for myinputs in h1])
mygrads.append(grad)
mygrads = np.vstack(mygrads)
#hidden layer one
n_hidden_two = 2
w1 = weights[1][0]
w2 = weights[2][0]
for neuron in range(n_hidden_two):
    grad = np.array([hid_two_layer(target, outputs, myinputs, w1, neuron, w2) for myinputs in inputs[0]])
h2_mygrads.append(grad)
h2_mygrads = np.vstack(h2_mygrads)

In [138]:
mygrads

array([[272.95051203, 188.56241765]])

In [139]:
l = 0.5

In [140]:
weights[0][0] = weights[0][0] - l*h2_mygrads
weights[1][0] = weights[1][0] - l*mygrads
weights[2][0][0] = weights[2][0][0] - l*gradients

In [141]:
weights[0][0] 

array([[-158.66150571, -145.59328059],
       [-159.05858275, -145.60734215]])

In [142]:
gradients

array([[23.48764147, 40.42477357]])

In [143]:
weights[2][0][0]

array([-11.03563662, -19.25928999])

In [144]:
weights[2][0]

array([[-11.03563662, -19.25928999],
       [  0.42618508,   0.11536369]])

In [145]:
n_epoch = [x for x in range(1, 100)]
loss = []
for n in n_epoch:
    myloss = train_network(network, inputs, target, 0.5, n)
    loss.append(myloss)

TypeError: 'numpy.float64' object is not iterable

Our neural network works as we see the decrease in the loss function with each epoch

In [86]:
network

[{'weights': [array([[0.94246795, 0.05119498],
          [0.01348304, 0.04932447]])]},
 {'weights': [array([[0.11698036, 0.06007718],
          [0.49238838, 0.31364219]])]},
 {'weights': [array([[0.10377062],
          [0.31765897]])]}]

In [99]:
weights[2][0].shape

(2, 1)

In [114]:
(weights[2][0].T).shape

(1, 2)

In [110]:
x = np.array([[0.01,0.2],[0.15,0.08]])
y = np.array([[0.01,0.5],[0.7,0.1]])
z = np.array([[0.24,0.55]])


In [111]:
z.shape

(1, 2)

In [112]:
z

array([[0.24, 0.55]])