In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

# Define a function to create a model with a variable number of layers and neurons
def create_model(input_size, hidden_layers, neurons_per_layer, output_size):
    layers = [nn.Linear(input_size, neurons_per_layer), nn.ReLU()]
    for _ in range(hidden_layers - 1):
        layers.extend([nn.Linear(neurons_per_layer, neurons_per_layer), nn.ReLU()])
    layers.append(nn.Linear(neurons_per_layer, output_size))
    return nn.Sequential(*layers)

# Load MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

# Function to train and evaluate a model
def train_and_evaluate(model, epochs=5):
    optimizer = optim.Adam(model.parameters())
    criterion = nn.CrossEntropyLoss()
    
    for epoch in range(epochs):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            optimizer.zero_grad()
            output = model(data.view(data.size(0), -1))
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
    
    model.eval()
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data.view(data.size(0), -1))
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    
    return correct / len(test_loader.dataset)

# Create and evaluate models with increasing complexity
input_size = 28 * 28
output_size = 10
models_to_test = [
    (1, 100),    # 1 hidden layer, 100 neurons
    (5, 1000),   # 5 hidden layers, 1000 neurons each
    (20, 5000)   # 20 hidden layers, 5000 neurons each
]

accuracies = []

for hidden_layers, neurons_per_layer in models_to_test:
    model = create_model(input_size, hidden_layers, neurons_per_layer, output_size)
    accuracy = train_and_evaluate(model)
    accuracies.append(accuracy)
    print(f"Model with {hidden_layers} hidden layers and {neurons_per_layer} neurons per layer:")
    print(f"Number of parameters: {sum(p.numel() for p in model.parameters())}")
    print(f"Test accuracy: {accuracy:.4f}\n")

# Plot results
plt.figure(figsize=(10, 6))
plt.plot(range(len(models_to_test)), accuracies, marker='o')
plt.xlabel('Model Complexity')
plt.ylabel('Test Accuracy')
plt.title('Model Performance vs Complexity')
plt.xticks(range(len(models_to_test)), [f"{hl} layers\n{n} neurons" for hl, n in models_to_test], rotation=45)
plt.tight_layout()
plt.show()

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:04<00:00, 2284506.77it/s]


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 119665.25it/s]


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:04<00:00, 363183.33it/s]


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 1522580.62it/s]


Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw

Model with 1 hidden layers and 100 neurons per layer:
Number of parameters: 79510
Test accuracy: 0.9739



KeyboardInterrupt: 