See http://www.anotsorandomwalk.com/backpropagation-example-with-numbers-step-by-step/

In [1]:
import numpy as np
import pandas as pd
from typing import Tuple
%matplotlib inline

In [2]:
# weights and biases to learn with their initial values
w1 = 0.82
w2 = 0.53
w3 = 0.44
w4 = 0.15
w5 = 0.61
w6 = 0.39
w7 = 0.11
w8 = 0.41
w9 = 0.50
w10 = 0.45
w11 = 0.23
w12 = 0.68
w13 = 0.49
w14 = 0.71
w15 = 0.23
w16 = 0.21
w17 = 0.44
w18 = 0.62
b1 = 0.5
b2 = 0.5

In [3]:
def sigmoid(z):
    return np.divide(1, (1 + np.exp(-z)))

In [4]:
# input and target values of the sample the network is trained on:
x1 = 2
x2 = 5
x3 = 1
x4 = 4
t1 = 0.05
t2 = 0.1

In [5]:
# the error function (= mean squared error MSE)
def error(olist, tlist):
    return 0.5 * (np.square(olist[0] - tlist[0]) + np.square(olist[1] - tlist[1]))

In [6]:
# implement our simple MLP neural network with one hidden layer
def forward_propagation(x1, x2, x3, x4) -> Tuple[float, float, float, float, float]:
    zh1 = w1 * x1 + w4 * x2 + w7 * x3 + w10 * x4 + b1
    zh2 = w2 * x1 + w5 * x2 + w8 * x3 + w11 * x4 + b1
    zh3 = w3 * x1 + w6 * x2 + w9 * x3 + w12 * x4 + b1
    
    h1 = sigmoid(zh1)
    h2 = sigmoid(zh2)
    h3 = sigmoid(zh3)
    
    zo1 = w13 * h1 + w15 * h2 + w17 * h3 + b2
    zo2 = w14 * h1 + w16 * h2 + w18 * h3 + b2
    
    o1 = sigmoid(zo1)
    o2 = sigmoid(zo2)
    
    return h1, h2, h3, o1, o2

In [7]:
# FORWARD PROPAGATION
# make one iteratoin through the network and show the total error.
h1, h2, h3, o1, o2 = forward_propagation(x1, x2, x3, x4)
e_init = error([o1, o2], [t1, t2])
print("Results from 1 forward propagation:")
print("Values of hidden neurons h1-h3:")
print(f"\th1: {h1}\th2: {h2}\th3: {h3}")
print("Predictions (output nodes o1-o2):")
print(f"\to1: {o1}\to2: {o2}")
print("Ground truth (target values t1-t2):")
print(f"\tt1: {t1}\tt2: {t2}")
print(f"Total error: {e_init}")

Results from 1 forward propagation:
Values of hidden neurons h1-h3:
	h1: 0.9918374288468401	h2: 0.9973748797433398	h3: 0.9985719267115775
Predictions (output nodes o1-o2):
	o1: 0.8395344467680094	o2: 0.8841947961679333
Ground truth (target values t1-t2):
	t1: 0.05	t2: 0.1
Total error: 0.6191630604850664


In [8]:
# BACKPROPAGATION
learning_rates = [0.01, 0.05, 1.0]
iterations = [1, 1000, 10000, 100000]

In [10]:
def backpropagation(iterations:int, alpha:float):
    global w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15, w16, w17, w18, b1, b2
    # 1. perform forward operation
    # NB: this is equivalent to stochastic gradient descent as we do 
    # the backprop for each sample (here only one sample though), our sample is:
    # input: x1, x2, x3, x4; target: t1, t2
    for iter in range(iterations):
        # do forward propagation
        h1, h2, h3, o1, o2 = forward_propagation(x1, x2, x3, x4)
        
        # calculate error
        e = error([o1, o2], [t1, t2])
        
        # perform backpropagation:
        # 1) calculate all gradients
        dEdw18 = (o2 - t2) * o2 * (1 - o2) * h3
        dEdw17 = (o1 - t1) * o1 * (1 - o1) * h3
        
        dEdw16 = (o2 - t2) * o2 * (1 - o2) * h2
        dEdw15 = (o1 - t1) * o1 * (1 - o1) * h2
        
        dEdw14 = (o2 - t2) * o2 * (1 - o2) * h1
        dEdw13 = (o1 - t1) * o1 * (1 - o1) * h1
        
        dEdb2 = (o1 - t1) * o1 * (1 - o1) + (o2 - t2) * o2 * (1 - o2)
        
        dEdh1 = (o1 - t1) * o1 * (1 - o1) * w13 + (o2 - t2) * o2 * (1 - o2) * w14
        dEdh2 = (o1 - t1) * o1 * (1 - o1) * w15 + (o2 - t2) * o2 * (1 - o2) * w16
        dEdh3 = (o1 - t1) * o1 * (1 - o1) * w17 + (o2 - t2) * o2 * (1 - o2) * w18
        
        dEdw12 = dEdh3 * h3 * (1 - h3) * x4
        dEdw11 = dEdh2 * h2 * (1 - h2) * x4
        dEdw10 = dEdh1 * h1 * (1 - h1) * x4
        
        dEdw9 = dEdh3 * h3 * (1 - h3) * x3
        dEdw8 = dEdh2 * h2 * (1 - h2) * x3
        dEdw7 = dEdh1 * h1 * (1 - h1) * x3
        
        dEdw6 = dEdh3 * h3 * (1 - h3) * x2
        dEdw5 = dEdh2 * h2 * (1 - h2) * x2
        dEdw4 = dEdh1 * h1 * (1 - h1) * x2
        
        dEdw3 = dEdh3 * h3 * (1 - h3) * x1
        dEdw2 = dEdh2 * h2 * (1 - h2) * x1
        dEdw1 = dEdh1 * h1 * (1 - h1) * x1

        dEdb1 = dEdh1 * h1 * (1 - h1) + dEdh2 * h2  * (1 - h2) + dEdh3 * h3 * (1 - h3)
        
        
        # 2) update all weights and biases
        # update weights w18 - w13 and b2
        w18 = w18 - alpha * dEdw18
        w17 = w17 - alpha * dEdw17
        w16 = w16 - alpha * dEdw16
        w15 = w15 - alpha * dEdw15
        w14 = w14 - alpha * dEdw14
        w13 = w13 - alpha * dEdw13
        b2 = b2 - alpha * dEdb1
        
        # update weights w12 - w1 and b1
        w12 = w12 - alpha * dEdw12
        w11 = w11 - alpha * dEdw11
        w10 = w10 - alpha * dEdw10
        w9 = w9 - alpha * dEdw9
        w8 = w8 - alpha * dEdw8
        w7 = w7 - alpha * dEdw7
        w6 = w6 - alpha * dEdw6
        w5 = w5 - alpha * dEdw5
        w4 = w4 - alpha * dEdw4
        w3 = w3 - alpha * dEdw3
        w2 = w2 - alpha * dEdw2
        w1 = w1 - alpha * dEdw1


In [11]:
# perform one backpropagation through the network for a learning rate of 0.1
backpropagation(iterations=1, alpha=0.1)
h1, h2, h3, o1, o2 = forward_propagation(x1, x2, x3, x4)
e = error([o1, o2], [t1, t2])
print("Results from 1 forward propagation after 1 backpropagation:")
print("Values of hidden neurons h1-h3:")
print(f"\th1: {h1}\th2: {h2}\th3: {h3}")
print("Predictions (output nodes o1-o2):")
print(f"\to1: {o1}\to2: {o2}")
print("Ground truth (target values t1-t2):")
print(f"\tt1: {t1}\tt2: {t2}")
print(f"Total error: {round(e, 5)} (vs. {round(e_init, 5)} w/o bp)")

Results from 1 forward propagation after 1 backpropagation:
Values of hidden neurons h1-h3:
	h1: 0.9918044602061147	h2: 0.9973735762666279	h3: 0.9985710229371896
Predictions (output nodes o1-o2):
	o1: 0.8352069207845401	o2: 0.8817114034280252
Ground truth (target values t1-t2):
	t1: 0.05	t2: 0.1
Total error: 0.61381 (vs. 0.61916 w/o bp)
Network weights and biases:
	w1  = {w1}
	w2  = {w2}
	w3  = {w3
	w4  = {w4}
	w5  = {w5}
	w6  = {w6}
	w7  = {w7}
	w8  = {w8}
	w9  = {w9}
	w10 = {w10}
	w11 = {w11}
	w12 = {w12}
	w13 = {w13}
	w14 = {w14}
	w15 = {w15}
	w16 = {w16}
	w17 = {w17}
	w18 = {w18}
	b1  = {b1}
	b2  = {b2}


In [16]:
def print_weights():
    print(f"\tw1  = {w1}")
    print(f"\tw2  = {w2}")
    print(f"\tw3  = {w3}")
    print(f"\tw4  = {w4}")
    print(f"\tw5  = {w5}")
    print(f"\tw6  = {w6}")
    print(f"\tw7  = {w7}")
    print(f"\tw8  = {w8}")
    print(f"\tw9  = {w9}")
    print(f"\tw10 = {w10}")
    print(f"\tw11 = {w11}")
    print(f"\tw12 = {w12}")
    print(f"\tw13 = {w13}")
    print(f"\tw14 = {w14}")
    print(f"\tw15 = {w15}")
    print(f"\tw16 = {w16}")
    print(f"\tw17 = {w17}")
    print(f"\tw18 = {w18}")
    print(f"\tb1  = {b1}")
    print(f"\tb2  = {b2}")

In [17]:
print("Network weights and biases after 1 backpropagation:")
print_weights()

Network weights and biases after 1 backpropagation:
	w1  = 0.8198232996701793
	w2  = 0.5299783598485821
	w3  = 0.439972453580711
	w4  = 0.14955824917544838
	w5  = 0.6099458996214552
	w6  = 0.38993113395177753
	w7  = 0.10991164983508968
	w8  = 0.409989179924291
	w9  = 0.4999862267903555
	w10 = 0.4496465993403587
	w11 = 0.22995671969716414
	w12 = 0.6799449071614221
	w13 = 0.4794504990904563
	w14 = 0.7020358307812165
	w15 = 0.21939160098722876
	w16 = 0.20199136664354855
	w17 = 0.4293788688118612
	w18 = 0.6119817547007638
	b1  = 0.5
	b2  = 0.4998870565497362
