In [3]:
import gzip
import pickle
import numpy as np
import torch
import torch.nn.functional as F
from torch import nn, optim
from torch.utils.data import TensorDataset, DataLoader

# Load MNIST data from mnist.pkl.gz
def load_data():
    with gzip.open('mnist.pkl.gz', 'rb') as f:
        train_set, val_set, test_set = pickle.load(f, encoding='latin1')

    def to_tensor_dataset(data):
        x = torch.tensor(data[0], dtype=torch.float32)
        y = torch.tensor(data[1], dtype=torch.long)
        return TensorDataset(x, y)

    return to_tensor_dataset(train_set), to_tensor_dataset(val_set), to_tensor_dataset(test_set)

# Initialize parameters (weights and biases) for a simple 3-layer MLP
def init_params():
    W1 = (torch.randn(784, 128) * 0.01).clone().detach().requires_grad_()
    b1 = torch.zeros(128, requires_grad=True)
    W2 = (torch.randn(128, 64) * 0.01).clone().detach().requires_grad_()
    b2 = torch.zeros(64, requires_grad=True)
    W3 = (torch.randn(64, 10) * 0.01).clone().detach().requires_grad_()
    b3 = torch.zeros(10, requires_grad=True)
    return [W1, b1, W2, b2, W3, b3]
# Forward pass through the network
def forward(x, params):
    W1, b1, W2, b2, W3, b3 = params
    x = x @ W1 + b1
    x = F.relu(x)
    x = x @ W2 + b2
    x = F.relu(x)
    x = x @ W3 + b3
    return x

# Accuracy computation
def accuracy(output, target):
    preds = output.argmax(dim=1)
    return (preds == target).float().mean().item()

# Training loop
def train(train_loader, val_loader, params, epochs=10, lr=0.1):
    optimizer = optim.SGD(params, lr=lr)

    for epoch in range(epochs):
        total_loss = 0.0
        for x, y in train_loader:
            output = forward(x, params)
            loss = F.cross_entropy(output, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")
        eval_model(val_loader, params, name="Validation")

# Evaluation function
def eval_model(loader, params, name="Test"):
    correct = 0
    total = 0
    with torch.no_grad():
        for x, y in loader:
            output = forward(x, params)
            pred = output.argmax(dim=1)
            correct += (pred == y).sum().item()
            total += y.size(0)
    acc = 100.0 * correct / total
    print(f"{name} Accuracy: {acc:.2f}%")


train_data, val_data, test_data = load_data()
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
val_loader = DataLoader(val_data, batch_size=64)
test_loader = DataLoader(test_data, batch_size=64)

params = init_params()
train(train_loader, val_loader, params, epochs=10, lr=0.1)
eval_model(test_loader, params, name="Test")


Epoch 1, Loss: 1192.4474
Validation Accuracy: 84.54%
Epoch 2, Loss: 286.4903
Validation Accuracy: 92.73%
Epoch 3, Loss: 172.7766
Validation Accuracy: 95.00%
Epoch 4, Loss: 124.5083
Validation Accuracy: 95.32%
Epoch 5, Loss: 96.6361
Validation Accuracy: 95.29%
Epoch 6, Loss: 78.5485
Validation Accuracy: 96.05%
Epoch 7, Loss: 65.5200
Validation Accuracy: 96.87%
Epoch 8, Loss: 57.1584
Validation Accuracy: 96.22%
Epoch 9, Loss: 48.1913
Validation Accuracy: 97.32%
Epoch 10, Loss: 41.8791
Validation Accuracy: 97.33%
Test Accuracy: 97.21%
