In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Hyperparameters
batch_size = 64
learning_rate = 1e-3
input_size = 28 * 28
hidden_sizes = [64, 64, 64, 64, 64, 64]
output_size = 10
num_epochs = 10

# Dataset and DataLoader
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.MNIST(root="./data", train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root="./data", train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# MLP Model
class MLP(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(MLP, self).__init__()
        layers = []
        layers.append(nn.Linear(input_size, hidden_sizes[0], bias=False))
        layers.append(nn.ReLU())
        for i in range(len(hidden_sizes) - 1):
            layers.append(nn.Linear(hidden_sizes[i], hidden_sizes[i + 1], bias=False))
            layers.append(nn.ReLU())
        layers.append(nn.Linear(hidden_sizes[-1], output_size, bias=False))
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

# Initialize model, criterion, and optimizer
model = MLP(input_size=input_size, hidden_sizes=hidden_sizes, output_size=output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
def train(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    correct = 0
    for images, labels in loader:
        images = images.view(-1, 28 * 28).to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        correct += (outputs.argmax(1) == labels).sum().item()

    accuracy = 100 * correct / len(loader.dataset)
    print(f"Train Loss: {total_loss:.4f}, Accuracy: {accuracy:.2f}%")

# Evaluation loop
def evaluate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    with torch.no_grad():
        for images, labels in loader:
            images = images.view(-1, 28 * 28).to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            correct += (outputs.argmax(1) == labels).sum().item()

    accuracy = 100 * correct / len(loader.dataset)
    print(f"Test Loss: {total_loss:.4f}, Accuracy: {accuracy:.2f}%")

In [10]:
# Training and testing
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    train(model, train_loader, optimizer, criterion, device)
    evaluate(model, test_loader, criterion, device)

# Saving the model (optional)
torch.save(model.state_dict(), "mlp_mnist.pth")

Epoch 1/10
Train Loss: 562.9444, Accuracy: 80.56%
Test Loss: 57.0740, Accuracy: 88.62%
Epoch 2/10
Train Loss: 241.2049, Accuracy: 92.25%
Test Loss: 31.5429, Accuracy: 93.93%
Epoch 3/10
Train Loss: 177.8019, Accuracy: 94.23%
Test Loss: 24.3012, Accuracy: 95.23%
Epoch 4/10
Train Loss: 147.0865, Accuracy: 95.19%
Test Loss: 22.1204, Accuracy: 95.87%
Epoch 5/10
Train Loss: 124.4649, Accuracy: 95.99%
Test Loss: 27.7812, Accuracy: 94.68%
Epoch 6/10
Train Loss: 114.4069, Accuracy: 96.29%
Test Loss: 20.1599, Accuracy: 96.21%
Epoch 7/10
Train Loss: 103.7015, Accuracy: 96.67%
Test Loss: 19.2384, Accuracy: 96.31%
Epoch 8/10
Train Loss: 92.0813, Accuracy: 97.03%
Test Loss: 20.7839, Accuracy: 96.03%
Epoch 9/10
Train Loss: 90.6104, Accuracy: 97.02%
Test Loss: 16.4357, Accuracy: 96.92%
Epoch 10/10
Train Loss: 80.1653, Accuracy: 97.37%
Test Loss: 17.3013, Accuracy: 96.67%
