In [3]:
# Cell 1: imports and seed
import torch  # Brings in the core PyTorch library. You use it for tensors, autograd, device management, and basic ops
import torch.nn as nn # torch.nn contains building blocks for neural networks (layers, containers, loss functions). Aliasing to nn is a widely used convention.
import torch.optim as optim # torch.optim contains optimizers (SGD, Adam, etc.). Aliasing to optim keeps code concise.
import torchvision  # torchvision supplies common vision datasets (MNIST, CIFAR), pretrained models, and utilities for image transforms.
import torchvision.transforms as transforms # transforms provides functions to convert/augment images (e.g., ToTensor, cropping). We alias it for clarity.
import matplotlib.pyplot as plt # For plotting training results or example images.

torch.manual_seed(0)

<torch._C.Generator at 0x1f724716850>

In [8]:
# Cell 2: dataset and loaders
transform = transforms.Compose([transforms.ToTensor()])

train_dataset = torchvision.datasets.MNIST(root='../data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.MNIST(root='../data', train=False, transform=transform, download=True)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=1000, shuffle=False)

print(f"Train size: {len(train_dataset)}, Test size: {len(test_dataset)}")

100%|█████████████████████████████████████████████████████████████████████████████| 9.91M/9.91M [00:00<00:00, 13.8MB/s]
100%|█████████████████████████████████████████████████████████████████████████████| 28.9k/28.9k [00:00<00:00, 1.82MB/s]
100%|█████████████████████████████████████████████████████████████████████████████| 1.65M/1.65M [00:00<00:00, 9.77MB/s]
100%|█████████████████████████████████████████████████████████████████████████████████████| 4.54k/4.54k [00:00<?, ?B/s]

Train size: 60000, Test size: 10000





In [9]:
# Cell 3: model definition
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(28*28, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = MLP()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)


In [10]:
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
    optimizer.zero_grad()
    output = model(data)
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
    if batch_idx % 100 == 0:
        print(f"Batch {batch_idx}, Loss: {loss.item():.4f}")

Batch 0, Loss: 2.3023
Batch 100, Loss: 0.6428
Batch 200, Loss: 0.3422
Batch 300, Loss: 0.4669
Batch 400, Loss: 0.5088
Batch 500, Loss: 0.2350
Batch 600, Loss: 0.2396
Batch 700, Loss: 0.2746
Batch 800, Loss: 0.1008
Batch 900, Loss: 0.2032


In [11]:
# Cell 5: evaluation
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
        outputs = model(data)
        _, predicted = torch.max(outputs, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")

Test Accuracy: 92.50%


In [12]:
# Cell 6: save model weights
torch.save(model.state_dict(), '../models/mlp_mnist.pth')
print("Saved model to ../models/mlp_mnist.pth")

Saved model to ../models/mlp_mnist.pth
