# Lab 2 - Feedforward Network and Backpropagation


In [92]:
# Imports
import numpy as np


In [93]:
# Sample data generation
# Training data
X = np.random.randint(2, size=[50, 2])
Z = np.array([X[:, 0] ^ X[:, 1]]).T

# Test data
X_Test = np.random.randint(2, size=[50, 2])
Z_Test = np.array([X_Test[:, 0] ^ X_Test[:, 1]]).T


In [94]:
# Initialise weights and biases
W1 = np.random.randn(3, 2)
B1 = np.random.randn(3)
W2 = np.random.randn(1, 3)
B2 = np.random.randn(1)


### Can you explain why we set the dimensions of the weights as (3, 2) and (1, 3)?

The weights were set as (3, 2) and (1, 3) because the data now needs to train/test a hidden layer of 3 neurons and one final neuron, as opposed to simply one as before.


In [95]:
# Activation Function

def sigm(X, W, B):
    """
    Sigmoid Function, a type of activation function
    X = inputs
    W = weights
    B = bias
    """
    M = 1/(1 + np.exp(-(X.dot(W.T) + B)))
    return M


def Forward(X, W1, B1, W2, B2):

    # First Layer
    H = sigm(X, W1, B1)

    # Second Layer
    Y = sigm(H, W2, B2)

    # Return the final output and the output from the final layer
    return Y, H


Due to the presence of the hidden layer, we need to use the backpropagation algorithm to update the weights.

$$
E=(z-y)^2\\
w_i'=w_i+\eta\frac{dE}{dw_i}\\
b'=b+\eta\frac{dE}{db}
$$


In [96]:
# Update rules for weights and bias

def diff_B2(Z, Y):
    # Calculate the derivative of B
    dB = (Z - Y) * Y * (1 - Y)

    return dB.sum(axis=0)


def diff_W2(H, Z, Y):
    dW = (Z - Y) * Y * (1 - Y)
    return H.T.dot(dW)


def diff_W1(X, H, Z, Y, W2):
    dZ = (Z - Y).dot(W2) * Y * (1 - Y) * H * (1 - H)
    return X.T.dot(dZ)


def diff_B1(Z, Y, W2, H):
    return ((Z - Y).dot(W2) * Y * (1 - Y) * H * (1 - H)).sum(axis=0)


In [97]:
# Learning process
learning_rate = 1e-2

for epoch in range(10000):
    Y, H = Forward(X, W1, B1, W2, B2)

    W2 += learning_rate * diff_W2(H, Z, Y).T
    B2 += learning_rate * diff_B2(Z, Y)
    W1 += learning_rate * diff_W1(X, H, Z, Y, W2).T
    B1 += learning_rate * diff_B1(Z, Y, W2, H)
    if not epoch % 50:
        Accuracy = 1 - np.mean((Z - Y)**2)
        print(f"Epoch: {epoch}, Accuracy: {Accuracy}")


Epoch: 0, Accuracy: 0.5430394324536787
Epoch: 50, Accuracy: 0.7425259099607534
Epoch: 100, Accuracy: 0.754459128236867
Epoch: 150, Accuracy: 0.758340072994752
Epoch: 200, Accuracy: 0.7619051277189562
Epoch: 250, Accuracy: 0.7652133977461756
Epoch: 300, Accuracy: 0.7683059604791087
Epoch: 350, Accuracy: 0.7712490550791076
Epoch: 400, Accuracy: 0.7741305803989659
Epoch: 450, Accuracy: 0.7770433848372265
Epoch: 500, Accuracy: 0.7800694274685428
Epoch: 550, Accuracy: 0.7832703517981404
Epoch: 600, Accuracy: 0.7866839120605373
Epoch: 650, Accuracy: 0.7903242782076466
Epoch: 700, Accuracy: 0.7941848748606266
Epoch: 750, Accuracy: 0.7982430831308162
Epoch: 800, Accuracy: 0.8024663282391316
Epoch: 850, Accuracy: 0.8068189671687829
Epoch: 900, Accuracy: 0.811269271774087
Epoch: 950, Accuracy: 0.8157958385658917
Epoch: 1000, Accuracy: 0.8203929258284444
Epoch: 1050, Accuracy: 0.8250743531226459
Epoch: 1100, Accuracy: 0.8298754800893309
Epoch: 1150, Accuracy: 0.8348523121700382
Epoch: 1200, Accur

In [98]:
# Testing
X_Test = np.random.randint(2, size=[50, 2])
Z_Test = np.array([X_Test[:, 0] ^ X_Test[:, 1]]).T
Y_Test, H = Forward(X_Test, W1, B1, W2, B2)
Accuracy = 1 - np.mean((Z_Test - Y_Test)**2)

print('Testing Accuracy: ', Accuracy)


Testing Accuracy:  0.9987279548265086
