In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from tqdm import tqdm

In [2]:
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
train_df = pd.read_csv('../data/train.csv')
test_df = pd.read_csv('../data/test.csv')
dig_df = pd.read_csv('../data/Dig-MNIST.csv')

In [4]:
pixel_columns = [col for col in train_df.columns if col.startswith("pixel")]
X = train_df[pixel_columns].values.reshape(-1, 1, 28, 28) / 255.0
y = train_df["label"].values
X_test = test_df[pixel_columns].values.reshape(-1, 1, 28, 28) / 255.0
X_dig = dig_df[pixel_columns].values.reshape(-1, 1, 28, 28) / 255.0
y_dig = dig_df["label"].values

In [5]:
class MNISTDataset(Dataset):
    def __init__(self, images, labels=None):
        self.images = torch.tensor(images, dtype=torch.float32)
        self.labels = torch.tensor(
            labels, dtype=torch.long) if labels is not None else None

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        return (self.images[idx], self.labels[idx]) if self.labels is not None else self.images[idx]

In [6]:
class CNNModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1), nn.BatchNorm2d(32), nn.ReLU(),
            nn.Conv2d(32, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(),
            nn.MaxPool2d(2), nn.Dropout(0.25),
            nn.Conv2d(64, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(),
            nn.MaxPool2d(2), nn.Dropout(0.25),
            nn.Flatten(),
            nn.Linear(128 * 7 * 7, 256), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        return self.model(x)


def train_one_epoch(model, loader, criterion, optimizer):
    model.train()
    total_loss = 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        outputs = model(x)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)


def evaluate(model, loader):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            outputs = model(x)
            preds = outputs.argmax(1).cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(y.numpy())
    return accuracy_score(all_labels, all_preds)




In [7]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
val_scores, dig_scores = [], []
test_preds_all = np.zeros((X_test.shape[0], 10))

for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
    model = CNNModel().to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
    criterion = nn.CrossEntropyLoss()

    train_loader = DataLoader(MNISTDataset(
        X[train_idx], y[train_idx]), batch_size=128, shuffle=True)
    val_loader = DataLoader(MNISTDataset(
        X[val_idx], y[val_idx]), batch_size=256, shuffle=False)
    dig_loader = DataLoader(MNISTDataset(X_dig, y_dig),
                            batch_size=256, shuffle=False)
    test_loader = DataLoader(MNISTDataset(
        X_test), batch_size=256, shuffle=False)

    best_val_acc = 0
    for epoch in range(1, 16):
        loss = train_one_epoch(model, train_loader, criterion, optimizer)
        val_acc = evaluate(model, val_loader)
        scheduler.step()
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_state = model.state_dict()

    model.load_state_dict(best_model_state)
    val_scores.append(best_val_acc)
    dig_acc = evaluate(model, dig_loader)
    dig_scores.append(dig_acc)

    test_probs_fold = []
    with torch.no_grad():
        for x in test_loader:
            x = x.to(device)
            logits = model(x)
            probs = torch.softmax(logits, dim=1).cpu().numpy()
            test_probs_fold.append(probs)
    test_preds_all += np.concatenate(test_probs_fold, axis=0) / skf.n_splits

In [11]:
final_preds = np.argmax(test_preds_all, axis=1)
submission = pd.DataFrame(
    {"id": np.arange(1, len(final_preds) + 1), "label": final_preds})
submission.to_csv(
    "../submissions/mldl_competition3_sharifbek_submission2.csv", index=False)

print(
    f"Avg Val Acc: {np.mean(val_scores):.4f}, Avg Dig Acc: {np.mean(dig_scores):.4f}")

Avg Val Acc: 0.9968, Avg Dig Acc: 0.8005


In [12]:
!jupyter nbconvert --to html "mldl_competition3_sharifbek_submission2.ipynb"

[NbConvertApp] Converting notebook mldl_competition3_sharifbek_submission2.ipynb to html
[NbConvertApp] Writing 303882 bytes to mldl_competition3_sharifbek_submission2.html
