In [6]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

In [None]:
class MNISTDataset(Dataset):
    def __init__(self, root_dir, tsv_file=None, transform=None):
        self.transform = transform
        self.samples = []
        if tsv_file:
            with open(tsv_file, 'r') as f:
                for line in f:
                    path, label = line.strip().split('\t')
                    self.samples.append((os.path.join(root_dir, path), int(label)))
        else:
            for label in sorted(os.listdir(root_dir)):
                label_dir = os.path.join(root_dir, label)
                if os.path.isdir(label_dir):
                    for img_name in os.listdir(label_dir):
                        self.samples.append((os.path.join(label_dir, img_name), int(label)))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        image = Image.open(img_path).convert('L')
        if self.transform:
            image = self.transform(image)
        else:
            image = transforms.ToTensor()(image)
        image = image.view(-1)
        return image, label

In [7]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [8]:
def train_model(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
    return running_loss / len(train_loader.dataset)

In [9]:
def evaluate_model(model, loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

In [None]:
def hyperparameter_search(train_dataset, val_dataset, device):
    hidden_sizes = [32, 64, 128, 256]
    learning_rates = [0.0001, 0.001, 0.01, 0.1]
    best_val_acc = 0.0
    best_params = None
    best_model = None

    for hs in hidden_sizes:
        for lr in learning_rates:
            model = MLP(28*28, hs, 10).to(device)
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(model.parameters(), lr=lr)
            train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
            val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
            
            for epoch in range(5):
                train_loss = train_model(model, train_loader, criterion, optimizer, device)

            val_acc = evaluate_model(model, val_loader, device)
            print(f"Hidden Size: {hs}, LR: {lr}, Val Accuracy: {val_acc:.4f}")
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                best_params = (hs, lr)
                best_model = model

    print("Best Hyperparameters:", best_params, "with validation accuracy:", best_val_acc)
    return best_model, best_params

In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Lambda(lambda x: x.view(-1))
    ])

In [16]:
train_data_path = '/Users/user/Downloads/MNIST-full/train'
train_tsv = '/Users/user/Downloads/MNIST-full/gt-train.tsv'
full_train_dataset = MNISTDataset(root_dir='/Users/user/Downloads/MNIST-full/', tsv_file='/Users/user/Downloads/MNIST-full/gt-train.tsv', transform=transform)

# Split into training and validation sets
indices = list(range(len(full_train_dataset)))
train_indices, val_indices = train_test_split(indices, test_size=0.1, random_state=42)
train_subset = torch.utils.data.Subset(full_train_dataset, train_indices)
val_subset = torch.utils.data.Subset(full_train_dataset, val_indices)


In [17]:
best_model, best_params = hyperparameter_search(train_subset, val_subset, device)

Hidden Size: 32, LR: 0.0001, Val Accuracy: 0.9063
Hidden Size: 32, LR: 0.001, Val Accuracy: 0.9527
Hidden Size: 32, LR: 0.01, Val Accuracy: 0.9542
Hidden Size: 32, LR: 0.1, Val Accuracy: 0.7713
Hidden Size: 64, LR: 0.0001, Val Accuracy: 0.9155
Hidden Size: 64, LR: 0.001, Val Accuracy: 0.9617
Hidden Size: 64, LR: 0.01, Val Accuracy: 0.9545
Hidden Size: 64, LR: 0.1, Val Accuracy: 0.4602
Hidden Size: 128, LR: 0.0001, Val Accuracy: 0.9210
Hidden Size: 128, LR: 0.001, Val Accuracy: 0.9728
Hidden Size: 128, LR: 0.01, Val Accuracy: 0.9640
Hidden Size: 128, LR: 0.1, Val Accuracy: 0.5757
Hidden Size: 256, LR: 0.0001, Val Accuracy: 0.9365
Hidden Size: 256, LR: 0.001, Val Accuracy: 0.9700
Hidden Size: 256, LR: 0.01, Val Accuracy: 0.9627
Hidden Size: 256, LR: 0.1, Val Accuracy: 0.7300
Best Hyperparameters: (128, 0.001) with validation accuracy: 0.9728333333333333


In [18]:
test_dataset = MNISTDataset(root_dir='/Users/user/Downloads/MNIST-full/', tsv_file='/Users/user/Downloads/MNIST-full/gt-test.tsv', transform=transform)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
test_acc = evaluate_model(best_model, test_loader, device)
print(f"Test Accuracy: {test_acc:.4f}")

Test Accuracy: 0.9738
