# Setup

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Imports
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import pandas as pd
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt

# Data

In [None]:
class DigitRecognizerDataset(Dataset):
    def __init__(self, dataframe, is_test=False):
        self.is_test = is_test
        
        if not self.is_test:
            # Train set: First column is label, rest are pixels
            self.labels = torch.tensor(dataframe.iloc[:, 0].values, dtype=torch.long)
            data = dataframe.iloc[:, 1:].values
        else:
            # Test set: All columns are pixels, no labels provided
            self.labels = None 
            data = dataframe.values
            
        # Reshape the data to (Batch, Channel, Height, Width)
        self.features = torch.tensor(data, dtype=torch.float32).view(-1, 1, 28, 28)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        if self.is_test:
            return self.features[idx]
        return self.features[idx], self.labels[idx]

In [None]:
train_set_filepath = os.path.join(os.getcwd(), 'train.csv')
test_set_filepath = os.path.join(os.getcwd(), 'test.csv')

# Load datasets
base_train_df = pd.read_csv(train_set_filepath)
if os.path.exists(test_set_filepath):
    base_test_df = pd.read_csv(test_set_filepath)
else:
    base_test_df = None

# Only the train set has labels so we need to split that into our real train and test sets
train_df, val_df = train_test_split(base_train_df, test_size=0.2, random_state=42)

# Convert to torch datasets
train_set = DigitRecognizerDataset(train_df, is_test=False)
val_set = DigitRecognizerDataset(val_df, is_test=False)
if base_test_df:
    test_set = DigitRecognizerDataset(base_test_df, is_test=True)
else:
    test_set = None

# Create data loaders
train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
val_loader = DataLoader(val_set, batch_size=32, shuffle=False)
if test_set:
    test_loader = DataLoader(test_set, batch_size=32, shuffle=False)
else:
    test_loader = None

# Model

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
DEVICE = torch.device('cpu')
if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
elif torch.backends.mps.is_available():
    DEVICE = torch.device('mps')

cnn = CNN().to(DEVICE)

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(cnn.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2)

# Train

In [None]:
# Validation
def validate_model(model):
    model.eval() # Set model to evaluation mode
    correct = 0
    total = 0
    val_loss = 0.0
    with torch.no_grad():
        for data in val_loader:
            images, labels = data
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)
            outputs = model(images)
            loss = criterion(outputs, labels) # Calculate loss
            val_loss += loss.item()
            
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    avg_loss = val_loss / len(val_loader)
    print(f'Accuracy: {100 * correct / total:.2f}% | Val Loss: {avg_loss:.4f}')
    return avg_loss # Return this for the scheduler

In [None]:
# Train
EPOCHS = 10
for epoch in range(EPOCHS):
    cnn.train() # Set model back to training mode
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs = inputs.to(DEVICE)
        labels = labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = cnn(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 100:.3f}')
            running_loss = 0.0

    # Run validation and get the loss
    current_val_loss = validate_model(cnn)

    # Step the scheduler based on the validation loss
    scheduler.step(current_val_loss)
    
    current_lr = optimizer.param_groups[0]['lr']
    print(f'End of Epoch {epoch + 1} - Learning Rate: {current_lr}')
    
    print(f'End of Epoch {epoch + 1}')

print('Finished Training')

# Submission

In [None]:
# Submission
submission_df = None
if test_loader:
    submission_data = {'ImageId': [], 'Label': []}

    cnn.eval()

    with torch.no_grad():
        idx = 1
        for images in test_loader:
            images = images.to(DEVICE)
            logits = cnn(images)
            
            # Get the index of the max accumulated log-probability
            _, predicted = torch.max(logits, 1)

            preds = predicted.cpu().numpy()

            for p in preds:
                submission_data['ImageId'].append(idx)
                submission_data['Label'].append(p)
                idx += 1

    # Save to csv
    submission_df = pd.DataFrame(submission_data)
    submission_df.to_csv('submission.csv', index=False)
    print("Submission file saved!")

## Verification

In [None]:
if submission_df:
    # 1. Check for missing values
    missing_values = submission_df.isnull().sum().sum()

    # 2. Check for correct label range (0-9)
    invalid_labels = submission_df[(submission_df['Label'] < 0) | (submission_df['Label'] > 9)]

    # 3. Final Verification Report
    print("--- Submission Verification ---")
    print(f"Total Rows: {len(submission_df)}")
    print(f"Missing Values: {missing_values}")
    print(f"Invalid Labels Found: {len(invalid_labels)}")
    print(f"Column Names: {list(submission_df.columns)}")

    if len(submission_df) == len(real_test_df) and missing_values == 0 and len(invalid_labels) == 0:
        print("\n✅ Verification Passed! Your file is ready for submission.")
    else:
        print("\n❌ Verification Failed. Please check the counts above.")

In [None]:
if submission_df:    
    # Get one batch from the test loader
    images = next(iter(test_loader))
    outputs = cnn(images)
    _, predicted = torch.max(outputs, 1)

    # Plot the first image in the batch
    plt.imshow(images[0].reshape(28, 28), cmap='gray')
    plt.title(f'Predicted Label: {predicted[0].item()}')
    plt.show()