In [2]:
%pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# --------------------------
# 1. Define a simple CNN with nn.Sequential
# --------------------------
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        
        # Convolutional feature extractor
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),  # 1x28x28 → 32x28x28
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),                         # 32x14x14

            nn.Conv2d(32, 64, kernel_size=3, padding=1), # 32x14x14 → 64x14x14
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2)                           # 64x7x7
        )

        # Fully connected classifier
        self.classifier = nn.Sequential(
            nn.Linear(64 * 7 * 7, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 10)   # 10 classes (digits)
        )

    def forward(self, x):
        x = self.features(x)             # Conv + ReLU + Pool
        x = x.view(x.size(0), -1)        # Flatten
        x = self.classifier(x)           # FC layers
        return x

# --------------------------
# 2. Load dataset
# --------------------------
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.MNIST(root="./data", train=True, download=True, transform=transform)
test_dataset  = datasets.MNIST(root="./data", train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=1000, shuffle=False)

# --------------------------
# 3. Training setup
# --------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNN().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# --------------------------
# 4. Train loop
# --------------------------
for epoch in range(1, 3):  # just 2 epochs for demo
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        
        if batch_idx % 100 == 0:
            print(f"Epoch {epoch} [{batch_idx*len(data)}/{len(train_loader.dataset)}]  Loss: {loss.item():.4f}")

# --------------------------
# 5. Test loop
# --------------------------
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")


100.0%
100.0%
100.0%
100.0%


Epoch 1 [0/60000]  Loss: 2.3023
Epoch 1 [6400/60000]  Loss: 0.1370
Epoch 1 [12800/60000]  Loss: 0.2198
Epoch 1 [19200/60000]  Loss: 0.0504
Epoch 1 [25600/60000]  Loss: 0.0813
Epoch 1 [32000/60000]  Loss: 0.0259
Epoch 1 [38400/60000]  Loss: 0.0705
Epoch 1 [44800/60000]  Loss: 0.2178
Epoch 1 [51200/60000]  Loss: 0.0128
Epoch 1 [57600/60000]  Loss: 0.0436
Epoch 2 [0/60000]  Loss: 0.0215
Epoch 2 [6400/60000]  Loss: 0.0672
Epoch 2 [12800/60000]  Loss: 0.0468
Epoch 2 [19200/60000]  Loss: 0.0577
Epoch 2 [25600/60000]  Loss: 0.0306
Epoch 2 [32000/60000]  Loss: 0.0057
Epoch 2 [38400/60000]  Loss: 0.0079
Epoch 2 [44800/60000]  Loss: 0.1191
Epoch 2 [51200/60000]  Loss: 0.0065
Epoch 2 [57600/60000]  Loss: 0.0158
Test Accuracy: 98.83%
