Program for backpropogation

In [2]:
import numpy as np
#np.set_printoptions(precision=2)

sigmoid = lambda x : 1/(1+np.exp(-x))
derivatives_sigmoid = lambda x : x*(1-x)

def display_matrix(input,dc=2):#dc=decimal count
    r,c = input.shape
    for i in range(r):
        for j in range(c):
            input[i, j] = round(input[i, j], dc)
    return input


**Step 0:** Read input and output

In [3]:
#Step 0
x = np.array([[1, 0, 1, 0], [1, 0, 1, 1], [0, 1, 0, 1]])
y = np.array([[1], [1], [0]])

**Step 1:** Initialize weights and biases with random values (There are methods to initialize weights and biases but for now initialize with random values)

In [7]:
#Step 1
wh = np.random.rand(4,3)
bh = np.random.rand(1,3)
wout = np.random.rand(3,1)
bout = np.random.rand(1,1)

#For cross verification with demo
#wh = np.array([[.42,.88,.55],[.1,.73,.68],[.6,.18,.47],[.92,.11,.52]])
#bh = np.array([[.46,.72,.08]])
#wout = np.array([[.3],[.25],[.23]])
#bout = np.array([[.69]])

print("wh")
print(display_matrix(wh))
print()
print("bh")
print(display_matrix(bh))
print()
print("wout")
print(display_matrix(wout))
print()
print("bout")
print(display_matrix(bout))
print()

wh
[[0.62 0.21 0.07]
 [0.99 0.68 0.26]
 [0.82 0.42 0.85]
 [0.06 0.23 0.5 ]]

bh
[[0.92 0.78 0.44]]

wout
[[0.48]
 [0.01]
 [0.3 ]]

bout
[[0.74]]



**Step 2:** Calculate hidden layer input:
`hidden_layer_input = matrix_dot_product(X,wh) + bh`

In [8]:
#Step 2
hidden_layer_input = np.dot(x, wh) + bh
print("hidden_layer_input")
print(display_matrix(hidden_layer_input))
print()

hidden_layer_input
[[2.36 1.41 1.36]
 [2.42 1.64 1.86]
 [1.97 1.69 1.2 ]]



**Step 3:** Perform non-linear transformation on hidden linear input
`hiddenlayer_activations = sigmoid(hidden_layer_input)`

In [9]:
#Step 3
hidden_layer_activations = sigmoid(hidden_layer_input)
print("hidden_layer_activations")
print(display_matrix(hidden_layer_activations))
print()

hidden_layer_activations
[[0.91 0.8  0.8 ]
 [0.92 0.84 0.87]
 [0.88 0.84 0.77]]



**Step 4:** Perform linear and non-linear transformation of hidden layer activation at output layer
`output_layer_input = matrix_dot_product (hiddenlayer_activations * wout ) + bout` 
`output = sigmoid(output_layer_input)`

In [10]:
#Step 4
output_layer_input = np.dot(hidden_layer_activations,wout)+bout
output = sigmoid(output_layer_input)
print("output")
print(display_matrix(output))
print()

output
[[0.81]
 [0.81]
 [0.8 ]]



**Step 5:** Calculate gradient of Error(E) at output layer
`E = y-output`

In [11]:
#Step 5
error = y - output
print("E")
print(display_matrix(error))
print()

E
[[ 0.19]
 [ 0.19]
 [-0.8 ]]



**Step 6:** Compute slope at output and hidden layer
`Slope_output_layer= derivatives_sigmoid(output) Slope_hidden_layer = derivatives_sigmoid(hiddenlayer_activations)`

In [12]:
#Step 6
slope_output_layer = derivatives_sigmoid(output)
slope_hidden_layer = derivatives_sigmoid(hidden_layer_activations)

print("slope_output_layer")
print(display_matrix(slope_output_layer))
print()
print("slope_hidden_layer")
print(display_matrix(slope_hidden_layer))
print()

slope_output_layer
[[0.15]
 [0.15]
 [0.16]]

slope_hidden_layer
[[0.08 0.16 0.16]
 [0.07 0.13 0.11]
 [0.11 0.13 0.18]]



**Step 7:** Compute delta at output layer
`d_output = E * slope_output_layer*lr`

In [13]:
#Step 7
learning_rate = 1
delta_output = error * slope_output_layer * learning_rate
print("delta_output")
print(display_matrix(delta_output))
print()

delta_output
[[ 0.03]
 [ 0.03]
 [-0.13]]



*Step 8:** Calculate Error at hidden layer
`Error_at_hidden_layer = matrix_dot_product(d_output, wout.Transpose)`

In [14]:
#Step 8
error_at_hidden_layer = np.dot(delta_output, np.transpose(wout))
print("Error_at_hidden_layer")
print(display_matrix(error_at_hidden_layer,3))
print()

Error_at_hidden_layer
[[ 0.014  0.     0.009]
 [ 0.014  0.     0.009]
 [-0.062 -0.001 -0.039]]



**Step 9:** Compute delta at hidden layer
`d_hiddenlayer = Error_at_hidden_layer * slope_hidden_layer`

In [15]:
#Step 9
delta_hidden_layer = error_at_hidden_layer * slope_hidden_layer
print("delta_hidden_layer")
print(display_matrix(delta_hidden_layer,3))
print()

delta_hidden_layer
[[ 0.001  0.     0.001]
 [ 0.001  0.     0.001]
 [-0.007 -0.    -0.007]]



**Step 10:** Update weight at both output and hidden layer
`wout = wout + matrix_dot_product (hiddenlayer_activations.Transpose, d_output) * learning_rate`
`wh = wh+ matrix_dot_product (X.Transpose,d_hiddenlayer) * learning_rate`

In [16]:
#Step 10
learning_rate = 0.1
wout= wout + np.dot(np.transpose(hidden_layer_activations), delta_output) * learning_rate
print("wout")
print(display_matrix(wout))
print()

wh = wh + np.dot(np.transpose(x), delta_hidden_layer) * learning_rate
print("wh")
print(display_matrix(wh))
print()

wout
[[0.47]
 [0.  ]
 [0.3 ]]

wh
[[0.62 0.21 0.07]
 [0.99 0.68 0.26]
 [0.82 0.42 0.85]
 [0.06 0.23 0.5 ]]



**Step 11:** Update biases at both output and hidden layer
`bh = bh + sum(d_hiddenlayer, axis=0) * learning_rate`
`bout = bout + sum(d_output, axis=0)*learning_rate`

In [17]:
#Step 11
bh = bh + np.sum(delta_hidden_layer, axis=0) * learning_rate
bout= bout + np.sum(delta_output, axis=0) * learning_rate
print("bh")
print(display_matrix(bh))
print()
print("bout")
print(display_matrix(bout))

bh
[[0.92 0.78 0.44]]

bout
[[0.73]]
