# 08. Toy Problems

This notebook solves classic machine learning problems with minimal networks.

## Experiment Overview
- **Goal**: Solve classic ML problems with minimal networks
- **Model**: Various architectures (XOR, spiral, moons)
- **Features**: Decision boundary visualization, convergence analysis
- **Learning**: Understanding fundamental ML concepts

## What You'll Learn
- XOR problem solving
- Non-linear decision boundaries
- Spiral and moons datasets
- Network capacity vs. problem complexity


In [None]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import sys
import os
from sklearn.datasets import make_circles, make_moons
from sklearn.model_selection import train_test_split

# Add scripts directory to path
sys.path.append('../scripts')
from utils import get_device, set_seed, plot_decision_boundary

# Set random seed for reproducibility
set_seed(42)

# Get device
device = get_device()
print(f"Using device: {device}")

# Generate toy datasets
print("Generating toy datasets...")

# XOR problem
X_xor = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y_xor = np.array([0, 1, 1, 0])

# Circles problem
X_circles, y_circles = make_circles(n_samples=200, noise=0.1, random_state=42)

# Moons problem
X_moons, y_moons = make_moons(n_samples=200, noise=0.1, random_state=42)

# Visualize datasets
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# XOR
axes[0].scatter(X_xor[y_xor == 0, 0], X_xor[y_xor == 0, 1], c='red', marker='o', s=100, label='Class 0')
axes[0].scatter(X_xor[y_xor == 1, 0], X_xor[y_xor == 1, 1], c='blue', marker='s', s=100, label='Class 1')
axes[0].set_title('XOR Problem')
axes[0].set_xlabel('X1')
axes[0].set_ylabel('X2')
axes[0].legend()
axes[0].grid(True)

# Circles
axes[1].scatter(X_circles[y_circles == 0, 0], X_circles[y_circles == 0, 1], c='red', alpha=0.6, label='Class 0')
axes[1].scatter(X_circles[y_circles == 1, 0], X_circles[y_circles == 1, 1], c='blue', alpha=0.6, label='Class 1')
axes[1].set_title('Circles Problem')
axes[1].set_xlabel('X1')
axes[1].set_ylabel('X2')
axes[1].legend()
axes[1].grid(True)

# Moons
axes[2].scatter(X_moons[y_moons == 0, 0], X_moons[y_moons == 0, 1], c='red', alpha=0.6, label='Class 0')
axes[2].scatter(X_moons[y_moons == 1, 0], X_moons[y_moons == 1, 1], c='blue', alpha=0.6, label='Class 1')
axes[2].set_title('Moons Problem')
axes[2].set_xlabel('X1')
axes[2].set_ylabel('X2')
axes[2].legend()
axes[2].grid(True)

plt.tight_layout()
plt.savefig('../results/plots/toy_datasets.png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
# Define neural network for toy problems
class ToyProblemNN(nn.Module):
    def __init__(self, input_size=2, hidden_size=8, num_classes=2):
        super(ToyProblemNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        x = torch.tanh(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        x = self.fc3(x)
        return x

# Training function
def train_toy_model(model, X, y, epochs=1000, lr=0.01):
    """Train model on toy problem."""
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    
    losses = []
    
    for epoch in range(epochs):
        optimizer.zero_grad()
        output = model(X)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        
        losses.append(loss.item())
        
        if (epoch + 1) % 200 == 0:
            print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}')
    
    return losses

# Test XOR problem
print("Testing XOR problem...")
X_xor_tensor = torch.FloatTensor(X_xor).to(device)
y_xor_tensor = torch.LongTensor(y_xor).to(device)

xor_model = ToyProblemNN().to(device)
xor_losses = train_toy_model(xor_model, X_xor_tensor, y_xor_tensor, epochs=1000)

# Test Circles problem
print("\nTesting Circles problem...")
X_circles_tensor = torch.FloatTensor(X_circles).to(device)
y_circles_tensor = torch.LongTensor(y_circles).to(device)

circles_model = ToyProblemNN().to(device)
circles_losses = train_toy_model(circles_model, X_circles_tensor, y_circles_tensor, epochs=1000)

# Test Moons problem
print("\nTesting Moons problem...")
X_moons_tensor = torch.FloatTensor(X_moons).to(device)
y_moons_tensor = torch.LongTensor(y_moons).to(device)

moons_model = ToyProblemNN().to(device)
moons_losses = train_toy_model(moons_model, X_moons_tensor, y_moons_tensor, epochs=1000)

# Plot training losses
plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
plt.plot(xor_losses)
plt.title('XOR Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid(True)

plt.subplot(1, 3, 2)
plt.plot(circles_losses)
plt.title('Circles Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid(True)

plt.subplot(1, 3, 3)
plt.plot(moons_losses)
plt.title('Moons Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid(True)

plt.tight_layout()
plt.savefig('../results/plots/toy_training_losses.png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
# Evaluate models and visualize decision boundaries
def evaluate_model(model, X, y):
    """Evaluate model accuracy."""
    model.eval()
    with torch.no_grad():
        output = model(X)
        pred = output.argmax(dim=1)
        accuracy = (pred == y).float().mean().item()
    return accuracy

# Evaluate all models
xor_accuracy = evaluate_model(xor_model, X_xor_tensor, y_xor_tensor)
circles_accuracy = evaluate_model(circles_model, X_circles_tensor, y_circles_tensor)
moons_accuracy = evaluate_model(moons_model, X_moons_tensor, y_moons_tensor)

print(f"XOR Accuracy: {xor_accuracy:.4f}")
print(f"Circles Accuracy: {circles_accuracy:.4f}")
print(f"Moons Accuracy: {moons_accuracy:.4f}")

# Create decision boundary plots
def plot_decision_boundary(model, X, y, title, ax):
    """Plot decision boundary for 2D data."""
    h = 0.02
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    
    # Create grid points
    grid_points = torch.FloatTensor(np.c_[xx.ravel(), yy.ravel()]).to(device)
    
    # Get predictions
    model.eval()
    with torch.no_grad():
        Z = model(grid_points)
        Z = Z.argmax(dim=1).cpu().numpy()
    
    Z = Z.reshape(xx.shape)
    
    # Plot
    ax.contourf(xx, yy, Z, alpha=0.3, cmap='RdYlBu')
    ax.scatter(X[y == 0, 0], X[y == 0, 1], c='red', marker='o', s=50, label='Class 0')
    ax.scatter(X[y == 1, 0], X[y == 1, 1], c='blue', marker='s', s=50, label='Class 1')
    ax.set_title(title)
    ax.set_xlabel('X1')
    ax.set_ylabel('X2')
    ax.legend()
    ax.grid(True)

# Plot decision boundaries
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

plot_decision_boundary(xor_model, X_xor, y_xor, f'XOR Decision Boundary (Acc: {xor_accuracy:.3f})', axes[0])
plot_decision_boundary(circles_model, X_circles, y_circles, f'Circles Decision Boundary (Acc: {circles_accuracy:.3f})', axes[1])
plot_decision_boundary(moons_model, X_moons, y_moons, f'Moons Decision Boundary (Acc: {moons_accuracy:.3f})', axes[2])

plt.tight_layout()
plt.savefig('../results/plots/toy_decision_boundaries.png', dpi=300, bbox_inches='tight')
plt.show()

# Save models
torch.save(xor_model.state_dict(), '../results/logs/xor_model.pth')
torch.save(circles_model.state_dict(), '../results/logs/circles_model.pth')
torch.save(moons_model.state_dict(), '../results/logs/moons_model.pth')

print("\nModels saved successfully!")
