1. Imports and Initial Setup

In [None]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import pandas as pd 

2. Device Setup


In [None]:
device = torch.device("cpu")
print("Using device:", device)


3. Data Augmentation

In [None]:
def get_transforms():
    augmentation_train = transforms.Compose([
        transforms.RandomRotation(10),
        transforms.RandomAffine(degrees=20, translate=(0.1, 0.1), scale=(0.85, 1.15)),
        transforms.ToTensor(),
    ])
    augmentation_test = transforms.Compose([transforms.ToTensor()])
    return augmentation_train, augmentation_test


4. Dataset Class

In [None]:
class MNISTDataset(Dataset):
    def __init__(self, mode="train", transform=None):
        print(f"\nLoading MNIST {mode} dataset...")
        self.mode = mode
        self.transform = transform
        self.data = torchvision.datasets.MNIST(
            root="./data", train=(mode == "train"), download=True
        )
        print(f"Total length: {len(self.data)}")

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        image, label = self.data[index]
        if self.transform:
            image = self.transform(image)
        image = image.view(-1) 
        label = F.one_hot(torch.tensor(label), num_classes=10).float()
        return {"index": index, "image": image, "label": label}


5. DataLoader Creation

In [None]:
def get_dataloaders(train_transform, test_transform, batch_size, num_workers):
    train_dataset = MNISTDataset(mode="train", transform=train_transform)
    test_dataset = MNISTDataset(mode="test", transform=test_transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_loader, test_loader

batch_size, num_workers = 100, 0
train_transform, test_transform = get_transforms()
train_loader, test_loader = get_dataloaders(train_transform, test_transform, batch_size, num_workers)

6. Neural Network Definition

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self, sizes, loss_fn):
        super(NeuralNetwork, self).__init__()
        layers = []
        for i in range(len(sizes) - 1):
            in_dim, out_dim = sizes[i]
            layers.extend([nn.Linear(in_dim, out_dim), nn.BatchNorm1d(out_dim), nn.ReLU()])
        self.features = nn.Sequential(*layers)
        self.classifier = nn.Linear(sizes[-1][0], sizes[-1][1])
        self.loss_fn = loss_fn

    def forward(self, x, y=None):
        x = self.features(x)
        x = self.classifier(x)
        if y is not None:
            loss = self.loss_fn(x, y)
            return loss, x
        return x

7. Utility Functions

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

loss_function = nn.CrossEntropyLoss()
network = NeuralNetwork(sizes=[[784, 1024], [1024, 1024], [1024, 1024], [1024, 10]], loss_fn=loss_function)
network = network.to(device)

8. Training and Evaluation Functions

In [None]:
def train_one_epoch(net, dataloader, optimizer, scheduler, device):
    network.train()
    train_loss, train_correct = 0, 0
    for batch in tqdm(dataloader, desc="Training", unit="batch"):
        images, labels = batch["image"].to(device), batch["label"].to(device)
        optimizer.zero_grad()
        loss, outputs = net(images, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        train_correct += (torch.argmax(outputs, dim=1) == torch.argmax(labels, dim=1)).sum().item()

    scheduler.step()
    train_accuracy = 100. * train_correct / len(dataloader.dataset)
    return train_loss / len(dataloader.dataset), train_accuracy

def evaluate(net, dataloader, loss_fn, device):
    network.eval()
    test_loss, test_correct = 0, 0
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Testing", unit="batch"):
            images, labels = batch["image"].to(device), batch["label"].to(device)
            outputs = net(images)
            test_loss += loss_fn(outputs, labels).item()
            test_correct += (torch.argmax(outputs, dim=1) == torch.argmax(labels, dim=1)).sum().item()
    test_accuracy = 100. * test_correct / len(dataloader.dataset)
    return test_loss / len(dataloader.dataset), test_accuracy


9. Training Loop

In [None]:
learning_rate, weight_decay, momentum = 0.1, 1e-6, 0.9
epochs, milestones, gamma = 100, [25, 50, 75], 0.1

optimizer = optim.SGD(network.parameters(), lr=learning_rate, weight_decay=weight_decay, momentum=momentum)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=gamma)

best_accuracy, best_epoch = -1, -1
for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")
    train_loss, train_acc = train_one_epoch(network, train_loader, optimizer, scheduler, device)
    test_loss, test_acc = evaluate(network, test_loader, loss_function, device)

    print(f"Train Loss: {train_loss:.6f}, Train Acc: {train_acc:.2f}% | Test Loss: {test_loss:.6f}, Test Acc: {test_acc:.2f}%")

    if test_acc > best_accuracy:
        best_accuracy = test_acc
        best_epoch = epoch
        torch.save(network.state_dict(), "best_model.pt")

print(f"\nBest Test Accuracy: {best_accuracy:.2f}% at epoch {best_epoch + 1}")

10. Submission Generation

In [None]:
def generate_submission(network, dataloader, device, output_file="submission.csv"):
    print("\nLoading the best model for submission...")
    network.load_state_dict(torch.load("best_model.pt"))
    network.eval()

    submission_data = {"ID": [], "target": []}
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Generating Submission"):
            images = batch["image"].to(device)
            ids = batch["index"].cpu().numpy()
            outputs = network(images)
            predictions = torch.argmax(outputs, dim=1).cpu().numpy()

            submission_data["ID"].extend(ids)
            submission_data["target"].extend(predictions)

    submission_df = pd.DataFrame(submission_data)
    submission_df.sort_values(by="ID", inplace=True)
    submission_df.to_csv(output_file, index=False)
    print(f"Submission file '{output_file}' generated.")

generate_submission(network, test_loader, device)
