Source: https://github.com/ibesora/udacity-deeplearning-notes/tree/master/docs

In [1]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

x = np.array([0.5, 0.1, -0.2])
target = 0.6
learnrate = 0.5

#we are hardcoding the weights; in reality, we start with random weights
weights_input_hidden = np.array([[0.5, -0.6],
                                 [0.1, -0.2],
                                 [0.1, 0.7]])

weights_hidden_output = np.array([0.1, -0.3])

#How many inputs do we have? Hidden Layers? Hidden units in the hidden layer? How many ouputs
#do you see?

In [2]:
x.shape

(3,)

In [3]:
weights_input_hidden.shape

(3, 2)

In [2]:
hidden_layer_input = np.dot(x, weights_input_hidden)
#let us see what is going into the hidden layer - what do think its size is?
hidden_layer_input # 1 x 3 matrix dot 3 x 2 matrix

array([ 0.24, -0.46])

In [3]:
#apply activation to get output of hidden layer
hidden_layer_output = sigmoid(hidden_layer_input)
hidden_layer_output

array([0.55971365, 0.38698582])

In [4]:
#to get final output, we take the dot product of hidden_layer_output with the weights
#from hidden to output
output_layer_in = np.dot(hidden_layer_output, weights_hidden_output)
output_layer_in

-0.06012438223148006

In [5]:
output = sigmoid(output_layer_in)

In [6]:
error = target - output
error

0.11502656915007464

In [7]:
# Calculate error term for output layer
output_error_term = error * output * (1 - output)
output_error_term #this is the error coming out of the weights

0.028730669543515018

In [8]:
# Calculate error term for hidden layer
hidden_error_term = np.dot(output_error_term, weights_hidden_output) * \
                    hidden_layer_output * (1 - hidden_layer_output)
hidden_error_term

#this appears complex, but is actually easier to understand when we do it in two steps
#as shown below

array([ 0.00070802, -0.00204471])

In [9]:
#what is the error at the output of the hidden layer? we propagate the error gradient
#through the weights
hidden_layer_output_error = np.dot(output_error_term, weights_hidden_output)
hidden_layer_output_error

array([ 0.00287307, -0.0086192 ])

In [10]:
#now the hidden error term is the hidden_layer_output_error multiplied by the
#derivative of the activation function used in the hidden layer
hidden_error_term = hidden_layer_output_error * hidden_layer_output * (1 - hidden_layer_output)
hidden_error_term

array([ 0.00070802, -0.00204471])

In [11]:
#how much should we change the weights by?
weight_delta_h_o = learnrate * output_error_term * hidden_layer_output
weight_delta_h_o 

array([0.00804047, 0.00555918])

In [13]:
#do the same thing for the weights from input to hidden
weight_delta_i_h = learnrate * hidden_error_term * x[:, None]
weight_delta_i_h

array([[ 1.77005547e-04, -5.11178506e-04],
       [ 3.54011093e-05, -1.02235701e-04],
       [-7.08022187e-05,  2.04471402e-04]])

You would use the delta weights to update the weights....let us look at an example