# Multilayer Perceptrons(MLP)

In [1]:
import numpy as np

In [2]:
X = np.array([[0., 0.], [0., 1.], [1., 0.], [1., 1.]])
y = np.array([0., 1., 1., 0.])

In [3]:
class XORNetwork:
    def __init__(self):
        # Parameters
        self.W = np.random.randn(2, 4)
        self.c = np.random.randn(4)
        self.w = np.random.randn(4)
        self.b = np.random.randn(1)

    def __call__(self, x: np.ndarray):
        self.x = x
        self.h = self.W.T @ x + self.c
        self.a = np.maximum(self.h, 0)
        return np.dot(self.w, self.a) + self.b

    def backwards(self, loss_grad: float):
        self.w_grad = loss_grad * self.a
        self.b_grad = loss_grad
        loss_grad = self.w * loss_grad  # Backprop through linear output
        loss_grad = loss_grad * (self.h >= 0) # Backprop through ReLU
        self.W_grad = np.outer(self.x, loss_grad)
        self.c_grad = loss_grad

    def optim_step(self, lr=0.001):
        self.W -= self.W_grad * lr
        self.c -= self.c_grad * lr
        self.w -= self.w_grad * lr
        self.b -= self.b_grad * lr

def mse(y, y_hat):
    return np.pow(y - y_hat, 2)

def mse_grad(y, y_hat):
    return -2 * (y - y_hat)

In [4]:
# See prediction using random weights
np.random.seed(1)
xor_model = XORNetwork()
y_hat = np.array([xor_model(xi).item() for xi in X])
for xi, yi, yi_hat in zip(X, y, y_hat):
    print(f'x={xi} y={yi} y_hat={yi_hat:.5f}')
print(f'loss = {np.mean(np.sum(mse(y, y_hat))):.5f}')

x=[0. 0.] y=0.0 y_hat=1.38240
x=[0. 1.] y=1.0 y_hat=3.08159
x=[1. 0.] y=1.0 y_hat=0.25986
x=[1. 1.] y=0.0 y_hat=1.95905
loss = 10.62976


In [5]:
np.random.seed(1)
xor_model = XORNetwork()

# Training the XOR network
epilson = 0.000001
lr = 0.05
max_epochs = 10000

for epoch in range(max_epochs):
    loss = 0
    for xi, yi in zip(X, y):
        yi_hat = xor_model(xi)
        loss += 0.25 * mse(yi, yi_hat)
        loss_grad = mse_grad(yi, yi_hat)
        xor_model.backwards(loss_grad)
        xor_model.optim_step(lr)
    if epoch % 100 == 0:
        print(f'Epoch {epoch} | loss = {loss.item():.8f}')
    if loss <= epilson:
        print(f'Converged at epoch {epoch} | loss = {loss.item():.8f}')
        break
    lr *= 0.997
else:
    print(f'Failed to converge | loss = {loss.item():.8f}')

Epoch 0 | loss = 2.30893782
Epoch 100 | loss = 0.15412496
Epoch 200 | loss = 0.02602696
Epoch 300 | loss = 0.00119032
Epoch 400 | loss = 0.00009959
Epoch 500 | loss = 0.00001583
Epoch 600 | loss = 0.00000411
Epoch 700 | loss = 0.00000153
Converged at epoch 755 | loss = 0.00000099


In [6]:
# Check predictions
y_hat = np.array([xor_model(xi).item() for xi in X])
for xi, yi, yi_hat in zip(X, y, y_hat):
    print(f'x={xi} y={yi} y_hat={yi_hat:.5f}')
print(f'loss = {np.mean(np.sum(mse(y, y_hat))):.5f}')

x=[0. 0.] y=0.0 y_hat=0.00156
x=[0. 1.] y=1.0 y_hat=0.99927
x=[1. 0.] y=1.0 y_hat=0.99916
x=[1. 1.] y=0.0 y_hat=0.00036
loss = 0.00000
