# MLP from Scratch Implementation

This notebook demonstrates the implementation of a Multi-layer Perceptron from scratch using only NumPy, including the XOR problem as a validation test.


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import sys
import os

# Add src to path
sys.path.append(os.path.join(os.path.dirname(os.getcwd()), 'src'))

from mlp_from_scratch import forward_propagation, compute_loss, backward_propagation, update_parameters


In [None]:
# XOR Dataset
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

print("XOR Dataset:")
print("Input (X):")
print(X)
print("\nTarget (y):")
print(y.flatten())
print("\nXOR Truth Table:")
print("Input 1 | Input 2 | Output")
print("--------|---------|-------")
for i in range(len(X)):
    print(f"   {X[i,0]}    |    {X[i,1]}    |   {y[i,0]}")


In [None]:
# Initialize model parameters
input_size = 2
hidden_size = 3
output_size = 1
learning_rate = 0.01
num_epochs = 1000

# Initialize parameters with small random values
np.random.seed(42)
W1 = np.random.randn(input_size, hidden_size) * 0.1
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size) * 0.1
b2 = np.zeros((1, output_size))

print("Model Architecture:")
print(f"Input layer: {input_size} neurons")
print(f"Hidden layer: {hidden_size} neurons")
print(f"Output layer: {output_size} neuron")
print(f"Learning rate: {learning_rate}")
print(f"Number of epochs: {num_epochs}")

print(f"\nInitial weights W1 shape: {W1.shape}")
print(f"Initial bias b1 shape: {b1.shape}")
print(f"Initial weights W2 shape: {W2.shape}")
print(f"Initial bias b2 shape: {b2.shape}")


In [None]:
# Training loop
print("Training MLP on XOR problem...")
print("Epoch\tLoss\t\tPredictions")
print("-" * 40)

losses = []
predictions_history = []

for epoch in range(num_epochs):
    # Forward propagation
    p_i, cache = forward_propagation(X, W1, b1, W2, b2)
    loss = compute_loss(y, p_i)
    losses.append(loss)
    
    # Backward propagation
    gradients = backward_propagation(X, y, cache, W2)
    W1, b1, W2, b2 = update_parameters(W1, b1, W2, b2, gradients, learning_rate)
    
    # Store predictions for visualization
    predictions = (p_i > 0.5).astype(int)
    predictions_history.append(predictions.flatten())
    
    # Print progress
    if epoch % 200 == 0:
        print(f"{epoch}\t{loss:.4f}\t\t{predictions.flatten()}")

print(f"\nTraining completed after {num_epochs} epochs!")


In [None]:
# Final evaluation
p_i, _ = forward_propagation(X, W1, b1, W2, b2)
final_predictions = (p_i > 0.5).astype(int)
accuracy = np.mean(final_predictions == y) * 100

print("Final Results:")
print(f"Loss: {losses[-1]:.4f}")
print(f"Accuracy: {accuracy:.1f}%")
print(f"Predictions: {final_predictions.flatten()}")
print(f"True labels: {y.flatten()}")

# Check if XOR problem is solved
if accuracy == 100.0:
    print("\n✅ XOR problem successfully solved!")
else:
    print(f"\n❌ XOR problem not fully solved. Accuracy: {accuracy:.1f}%")


In [None]:
# Visualize training progress
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Plot training loss
ax1.plot(losses, linewidth=2, color='blue')
ax1.set_title('Training Loss Over Time', fontsize=14, fontweight='bold')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.grid(True, alpha=0.3)
ax1.set_yscale('log')  # Log scale for better visualization

# Plot predictions evolution
predictions_array = np.array(predictions_history)
for i in range(4):
    ax2.plot(predictions_array[:, i], label=f'Input {X[i]}', linewidth=2, marker='o', markersize=3)
ax2.set_title('Predictions Evolution', fontsize=14, fontweight='bold')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Prediction (0 or 1)')
ax2.set_yticks([0, 1])
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()
