# Multi-Layer Perceptron for the XOR problem in PyTorch

Markus Enzweiler, markus.enzweiler@hs-esslingen.de

This is a demo used in a Computer Vision & Machine Learning lecture. Feel free to use and contribute.

We build and train a multi-layer perceptron to act as a simple XOR gate with two inputs and one output. 
XOR gates have the following behavior:

If both inputs are identical, the output is 0 (off)
If both inputs are different, the output is 1 (on)

| observation # | input 1 | input 2 | output |
|---------------|---------|---------|--------|
| 0             | 0       | 0       | 0      |
| 1             | 0       | 1       | 1      |
| 2             | 1       | 0       | 1      |
| 3             | 1       | 1       | 0      |



In this demonstration, we replace our custom Perceptron class from the previous notebooks and use torch.nn

See https://pytorch.org/docs/stable/nn.html and in particular:
- https://pytorch.org/docs/stable/generated/torch.nn.Module.html  
- https://pytorch.org/docs/stable/generated/torch.nn.Linear.html 

## Setup

Adapt `packagePath` to point to the directory containing this notebeook.

In [None]:
# Imports
import sys
import os

In [None]:
# Additional imports

# Repository Root
repo_root = os.path.abspath(os.path.join("..", ".."))
# Add the repository root to the system path
sys.path.append(repo_root)

# Package Imports
from nbutils import requirements as nb_reqs
from nbutils import colab as nb_clab
from nbutils import git as nb_git
from nbutils import exec as nb_exec

In [None]:
# Package Path
package_path = "./" # local
print(f"Package path: {package_path}")

In [None]:
# Additional requirements for this notebook
req_file = os.path.join(package_path, "requirements.txt")
nb_reqs.pip_install_reqs(req_file)    

In [None]:
# Now we should be able to import the additional packages
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn

# Set the random seed for reproducibility
np.random.seed(42)
torch.manual_seed(42);


## Create the training data

In [None]:
# Define the training data for the XOR problem in numpy


# Define the training data for the XOR problem
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([0, 1, 1, 0])

# Convert numpy arrays to PyTorch tensors
X = torch.tensor(X, dtype=torch.float32)
Y = torch.tensor(Y, dtype=torch.float32)

print("Training data X with labels y:")
for i in range(len(X)):
    print(f"{X[i]} -> {Y[i]}")

# Define the Multi-Layer Perceptron (MLP)

## MLP class

In [None]:
# Multi-layer perceptron model
class MultiLayerPerceptron(nn.Module):
    def __init__(self, num_inputs, num_hidden_layer_neurons=2):
        super().__init__()

        # layer 1 defines the transformation from input to hidden layer
        self.layer1 = nn.Linear(num_inputs, num_hidden_layer_neurons)
        # layer 2 defines the transformation from hidden layer to output
        self.layer2 = nn.Linear(num_hidden_layer_neurons, 1)
        # sigmoid activation function
        self.sigmoid = nn.Sigmoid()

    def __call__(self, x):
        return self.forward(x)

    def forward(self, x):
        # x (input) -> hidden layer -> sigmoid -> output layer -> sigmoid
        x = self.sigmoid(self.layer1(x))
        x = self.sigmoid(self.layer2(x))
        return x
    

# MLP training with gradient descent

## Training and testing functions

In [None]:
# Training function 
def train(model, X, Y, optimizer, loss_fn, num_epochs):
    # Loop over epochs
    for epoch in range(num_epochs):

        # Reset accumulated loss per epoch
        acc_loss = 0

        # Loop over all training data
        for i in range(len(X)):   

            # training sample and label
            x,y = X[i], Y[i].unsqueeze(0)

            # forward pass
            y_hat = model(x)

            # backward pass
            loss = loss_fn(y_hat, y)
            loss.backward()
            
            # accumulate loss
            acc_loss += loss

            # Update weights and bias
            optimizer.step()
            optimizer.zero_grad()
        

        # Print accumulated average loss per epoch once in a while
        if (epoch % (num_epochs//10)) == 0 or epoch == num_epochs - 1:     
            print(f"Epoch {epoch:5d}: loss = {torch.mean(acc_loss):.5f}")

In [None]:
# Testing function
def test(model, X, Y):
    # test the perceptron on all data points
    print("Testing ...")
    for i in range(len(X)):
        prediction = model(X[i])
        print(f"{X[i]} -> {prediction} (label: {Y[i]})")

## Train and test

In [None]:
# Train our multi-layer perceptron model

# The model to train
model = MultiLayerPerceptron(num_inputs=2)

# Hyperparameters
num_epochs = 10000
eta = 0.25
# Stochastic gradient descent (SGD) optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=eta)

# We can use L2 (mean squared error) loss from PyTorch
loss_fn = nn.MSELoss()

# Train the model
train(model, X, Y, optimizer, loss_fn, num_epochs)

# Test the model
test(model, X, Y)

# Visualize decision boundary

In [None]:
import matplotlib.cm as cm
import matplotlib.gridspec as gridspec


def show_decision_boundary(model, data, labels, subplot_spec=None):

    data   = data.numpy()
    labels = labels.numpy()

    wratio = (15, 1)
    if subplot_spec is None:
        gs = gridspec.GridSpec(1, 2, width_ratios=wratio)
    else:
        gs = gridspec.GridSpecFromSubplotSpec(1, 2, subplot_spec=subplot_spec, width_ratios=wratio)
        
    ax = plt.subplot(gs[0])
    ax.set_title('Dataset and Decision Function')
    
    x_min, x_max = data[:, 0].min() - 1, data[:, 0].max() + 1
    y_min, y_max = data[:, 1].min() - 1, data[:, 1].max() + 1
    h = 0.01  # Reduced step size for higher resolution
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

    Z = model(torch.tensor(np.c_[xx.ravel(), yy.ravel()], dtype=torch.float32))
    Z = Z.reshape(xx.shape)

    # Increase the number of levels for smoother color transitions
    levels = np.linspace(0, 1, 100)
    ctr = ax.contourf(xx, yy, Z.detach().numpy(), levels, cmap=cm.gray, vmin=0, vmax=1)
    
    unique_labels = np.unique(labels)

    # Define colors for each class
    colors = ['red', 'blue']
    for i, yi in enumerate(unique_labels):
        color = colors[i]
        ax.scatter(data[np.where(labels.flatten() == yi), 0], data[np.where(labels.flatten() == yi), 1], 
                   color=color, linewidth=0, label='Class %d (y=%d)' % (yi, yi))
    ax.legend()
    ax.set_xlim((x_min, x_max))
    ax.set_ylim((y_min, y_max))

    # Create colorbar
    cbar = plt.colorbar(ctr, cax=plt.subplot(gs[1]))
    cbar.set_ticks(np.arange(0, 1.1, 0.1))  # Set ticks from 0 to 1 with 0.1 increments
    cbar.set_label('Decision value')

In [None]:
# Plot decision boundary
show_decision_boundary(model, X, Y)