In [None]:
import torch
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
from a3_mnist import Lenet
import time

In [124]:
# Set up device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [125]:
# Define normalization
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
)

# Load dataset
dataset1 = datasets.MNIST("../data", train=True, download=True, transform=transform)
dataset2 = datasets.MNIST("../data", train=False, transform=transform)

In [129]:
subset_indices = list(range(0, 1000))  # Train only first 5k samples to overfit
subset_data = torch.utils.data.Subset(dataset1, subset_indices)

train_loader = torch.utils.data.DataLoader(subset_data, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset2, batch_size=64, shuffle=False)


In [133]:
# Build the model we defined above
model = Lenet()

# Define the optimizer for model training
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=500, eta_min=0.0001, last_epoch=-1)

start_time = time.time()

for epoch in range(1, 501):
    model.train()
    train_loss = 0
    correct_train = 0

    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        pred = output.argmax(dim=1, keepdim=True)
        correct_train += pred.eq(target.view_as(pred)).sum().item()

    scheduler.step()

    # Evaluate on test set
    model.eval()
    test_loss = 0
    correct_test = 0

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()
            pred = output.argmax(dim=1, keepdim=True)
            correct_test += pred.eq(target.view_as(pred)).sum().item()

    train_acc = 100. * correct_train / len(subset_data)
    test_acc = 100. * correct_test / len(dataset2)

    if epoch % 5 == 0:
        print(f'Epoch {epoch}: Train Acc: {train_acc:.2f}%, Test Acc: {test_acc:.2f}%')
        print(f'  Gap (overfitting indicator): {train_acc - test_acc:.2f}%')
    
    # Early stopping
    if train_acc - test_acc > 4:
        print("✅ Successfully created overfitted model (gap > 4%)")
        break

# Final evaluation
print(f"\nFinal Results:")
print(f"Training Accuracy: {train_acc:.2f}%")
print(f"Test Accuracy: {test_acc:.2f}%")
print(f"Overfitting Gap: {train_acc - test_acc:.2f}%")


Epoch 5: Train Acc: 81.40%, Test Acc: 90.45%
  Gap (overfitting indicator): -9.05%
Epoch 10: Train Acc: 89.30%, Test Acc: 93.82%
  Gap (overfitting indicator): -4.52%
Epoch 15: Train Acc: 90.80%, Test Acc: 93.61%
  Gap (overfitting indicator): -2.81%
Epoch 20: Train Acc: 91.50%, Test Acc: 95.03%
  Gap (overfitting indicator): -3.53%
Epoch 25: Train Acc: 91.70%, Test Acc: 94.13%
  Gap (overfitting indicator): -2.43%
Epoch 30: Train Acc: 93.00%, Test Acc: 94.13%
  Gap (overfitting indicator): -1.13%
Epoch 35: Train Acc: 92.90%, Test Acc: 94.15%
  Gap (overfitting indicator): -1.25%
Epoch 40: Train Acc: 94.40%, Test Acc: 94.46%
  Gap (overfitting indicator): -0.06%
Epoch 45: Train Acc: 94.30%, Test Acc: 94.46%
  Gap (overfitting indicator): -0.16%
Epoch 50: Train Acc: 94.80%, Test Acc: 94.68%
  Gap (overfitting indicator): 0.12%
Epoch 55: Train Acc: 95.60%, Test Acc: 95.20%
  Gap (overfitting indicator): 0.40%
Epoch 60: Train Acc: 95.10%, Test Acc: 94.16%
  Gap (overfitting indicator): 0.

In [134]:
torch.save(model.state_dict(), "mnist_cnn_overfitted.pt")