In [1]:
import numpy as np

# Defining the sigmoid function for activations
def sigmoid(x):
    return 1/(1+np.exp(-x))

# Derivative of the sigmoid function
def sigmoid_prime(x):
    return sigmoid(x) * (1 - sigmoid(x))

In [2]:
# Input data
x = np.array([0.1, 0.3])
# Target
y = 0.2
# Input to output weights
weights = np.array([-0.8, 0.5])

In [3]:
# The learning rate, eta in the weight step equation
learnrate = 0.5

In [4]:
# the linear combination performed by the node (h in f(h) and f'(h))
h = np.dot(x, weights)
h

0.06999999999999998

In [5]:
# The neural network output (y-hat)
nn_output = sigmoid(h)
nn_output

0.5174928576663897

In [6]:
# output error (y - y-hat)
error = y - nn_output
error

-0.31749285766638974

In [7]:
# output gradient (f'(h))
output_grad = sigmoid_prime(h)
output_grad

0.24969399993066344

In [8]:
# error term (lowercase delta)
error_term = error * output_grad
error_term

-0.07927606158013766

In [9]:
# Gradient descent step 
del_w = [ learnrate * error_term * x[0],
          learnrate * error_term * x[1]]
# or del_w = learnrate * error_term * x
del_w

[-0.003963803079006883, -0.011891409237020648]

Note: If you are wondering where the derivative of the `sigmoid` function comes from (`sigmoid_prime` above), check out the derivation in this post.

In the quiz below, you'll implement gradient descent in code yourself, although with a few differences (which we'll leave to you to figure out!) from the above example.

In [10]:
import numpy as np

def sigmoid(x):
    """
    Calculate sigmoid
    """
    return 1/(1+np.exp(-x))

def sigmoid_prime(x):
    """
    Derivative of the sigmoid function
    """
    return sigmoid(x) * (1 - sigmoid(x))

learnrate = 0.5
x = np.array([1, 2, 3, 4])
y = np.array(0.5)

# Initial weights
w = np.array([0.5, -0.5, 0.3, 0.1])

### Calculate one gradient descent step for each weight
### Note: Some steps have been consolidated, so there are
###       fewer variable names than in the above sample code

# TODO: Calculate the node's linear combination of inputs and weights
h = np.dot(x, w)
h

0.7999999999999998

In [11]:
# TODO: Calculate output of neural network
nn_output = sigmoid(h)
nn_output

0.6899744811276125

In [12]:
# TODO: Calculate error of neural network
error = y - nn_output
error

-0.1899744811276125

In [13]:
# TODO: Calculate the error term
#       Remember, this requires the output gradient, which we haven't
#       specifically added a variable for.
error_term = error * sigmoid_prime(h)
error_term

-0.04063738360460799

In [15]:
# TODO: Calculate change in weights
del_w = learnrate * error_term * x
del_w

array([-0.02031869, -0.04063738, -0.06095608, -0.08127477])

In [16]:
print('Neural Network output:')
print(nn_output)
print('Amount of Error:')
print(error)
print('Change in Weights:')
print(del_w)

Neural Network output:
0.6899744811276125
Amount of Error:
-0.1899744811276125
Change in Weights:
[-0.02031869 -0.04063738 -0.06095608 -0.08127477]
