In [1]:
# Useful starting lines
%matplotlib inline

import numpy as np
import scipy
import scipy.io
import scipy.sparse as sp
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

Implement the sigmoid activation function.

In [2]:
def sigmoid(t):
    """apply sigmoid function on t."""
    return 1 / (1 + np.exp(-t))
    
def grad_sigmoid(t):
    """return the gradient of sigmoid on t."""
    return sigmoid(t) * (1 - sigmoid(t))

Note that you are working on a three-layer neural network with one input layer of size $D=4$, $L=1$ hidden layer with size $K=5$, and one output layer with size 1.

Initialize the data.

In [3]:
x = np.array([0.01, 0.02, 0.03, 0.04])
W = {
    "w_1": np.ones((4, 5)),
    "w_2": np.ones(5)
}
y = 1

# Problem 1: Feed-forward in neural network

Implement the neural network described by Equation 1 of the exercise sheet.

---

In matrix form, we write:

$$
\mathbf x^{(l)} = \phi \left(\mathbf z^{(l)} \right) = \phi \left( \left(W^{(l)}\right)^\top \mathbf x^{(l-1)} \right)$$

---

In [4]:
def simple_feed_forward(x, W):
    """Do feed-forward propagation."""
    z1 = W['w_1'].T.dot(x)
    x1 = sigmoid(z1)
    
    z2 = W['w_2'].dot(x1)
    y_hat = sigmoid(z2)
    
    return y_hat

try:
    expected = 0.93244675427215695
    yours = simple_feed_forward(x, W)
    assert np.sum((yours - expected) ** 2) < 1e-15
    print("Your implementation is correct!")
except:
    print("Your implementation is not correct.")

Your implementation is correct!


# Problem 2: Backpropagation in neural network

Implement your derivation of backpropagation. 

*Hint*: You might want to slightly change `simple_feed_forward`.

---

We use squared error as loss function, with a single sample in our case:

$$\mathcal L(\mathbf w) = \frac 1 2 \left( \hat y - y \right) ^ 2$$

We are asked to compute the derivatives

$$\frac {\partial \mathcal L(\mathbf w)} {\partial w_{i,1}^{(2)}}, \quad \frac {\partial \mathcal L(\mathbf w)} {\partial w_{i,j}^{(1)}}$$

We have:

$$\frac{\partial \mathcal L}{\partial w_{i1}^{(2)}} 
= \delta_1^{(2)} x_i^{(1)}, \quad \delta_1^{(2)} = \frac{\partial \mathcal L}{\partial z_1^{(2)}}$$


and:

$$\frac{\partial \mathcal L}{\partial w_{ij}^{(2)}} 
= \delta_1^{(2)} w_{j,1}^{(2)} \, \phi' \left( z_j^{(1)} \right)  \, x_i^{(0)}$$

---

In [5]:
def simple_feed_forward(x, W):
    """Do feed-forward propagation."""
    z1 = W['w_1'].T.dot(x)
    x1 = sigmoid(z1)
    
    z2 = W['w_2'].dot(x1)
    y_hat = sigmoid(z2)
    
    return y_hat, z1, z2

In [6]:
def simple_backpropagation(y, x, W):
    """Do backpropagation and get delta_W."""
    y_hat, z1, z2 = simple_feed_forward(x, W)
    
    delta_2 = (y_hat - y) * grad_sigmoid(z2)
    x1 = sigmoid(z1)
    delta_w_2 = delta_2 * x1
    
    delta_1 = delta_2 * W['w_2'] * grad_sigmoid(z1)
    x0 = x
    delta_w_1 = np.outer(x, delta_1)
    # TODO: why not correct ???
    #delta_1.reshape(-1, 1).dot(x0.reshape(1, -1))
    
    return {
        "w_1": delta_w_1,
        "w_2": delta_w_2
    }
    
try:
    expected = {
        'w_1': np.array([
            [ -1.06113639e-05,  -1.06113639e-05,  -1.06113639e-05, -1.06113639e-05,  -1.06113639e-05],
            [ -2.12227277e-05,  -2.12227277e-05,  -2.12227277e-05, -2.12227277e-05,  -2.12227277e-05],
            [ -3.18340916e-05,  -3.18340916e-05,  -3.18340916e-05, -3.18340916e-05,  -3.18340916e-05],
            [ -4.24454555e-05,  -4.24454555e-05,  -4.24454555e-05, -4.24454555e-05,  -4.24454555e-05]]),
        'w_2': np.array(
            [-0.00223387, -0.00223387, -0.00223387, -0.00223387, -0.00223387])
    }
    yours = simple_backpropagation(y, x, W)
    assert np.sum(
        [np.sum((yours[key] - expected[key]) ** 2)
         for key in expected.keys()]) < 1e-15
    print("Your implementation is correct!")
except:
    print(yours['w_2'])
    print("Your implementation is NOT correct!")

Your implementation is correct!
