In [63]:
# Import all the necessary libraries, frameworks and modules
import torch
import numpy as np
import pandas as pd
from PIL import Image
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import DataLoader, TensorDataset

In [64]:
# Instantiate the model and move it to the appropriate device (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [65]:
# Import the dataframes
train_df = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test_df = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')

In [66]:
# Normalize and reshape the training data
X_train = train_df.iloc[:, 1:].values.reshape(-1, 1, 28, 28) / 255.0
y_train = train_df.iloc[:, 0].values

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.int64)

# Create DataLoader for the training data
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Normalize and reshape the test data
X_test = test_df.values.reshape(-1, 1, 28, 28) / 255.0
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

# Create DataLoader for the test data
test_loader = DataLoader(X_test_tensor, batch_size=64)

In [67]:
# Define transformations
train_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.9, 1.1), shear=10),
    transforms.RandomPerspective(distortion_scale=0.2, p=0.5),
    transforms.Normalize((0.5,), (0.5,))
])

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, images, labels=None, transforms=None):
        self.X = images
        self.y = labels
        self.transforms = transforms

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        # Convert numpy array to PIL Image
        data = Image.fromarray((self.X[i] * 255).astype(np.uint8).squeeze(), mode='L')
        
        if self.transforms:
            data = self.transforms(data)
        
        if self.y is not None:
            return data, self.y[i]
        else:
            return data

# Apply transformations to the training dataset
train_dataset = CustomDataset(X_train, y_train, train_transforms)

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [68]:
class DigitRecognizerCNN(nn.Module):
    def __init__(self):
        super(DigitRecognizerCNN, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(256)
        self.conv4 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(512)

        # Max pooling layer
        self.pool = nn.MaxPool2d(2, 2)

        # Dropout layers
        self.dropout1 = nn.Dropout(0.3)
        self.dropout2 = nn.Dropout(0.6)

        # Fully connected layers
        # Adjusting the size for the first fully connected layer
        self.fc1 = nn.Linear(512 * 3 * 3, 1024)  # Adjusted size
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = F.relu(self.bn4(self.conv4(x)))  # No pooling here to maintain feature map size

        # Flatten the output for the fully connected layer
        x = x.view(-1, 512 * 3 * 3)  # Adjusted size

        x = self.dropout1(x)
        x = F.relu(self.fc1(x))
        x = self.dropout2(x)
        x = F.relu(self.fc2(x))  # Added missing ReLU
        x = self.fc3(x)

        return x

model = DigitRecognizerCNN().to(device)

In [69]:
# Loss Function
loss_fn = nn.CrossEntropyLoss()

# Optimizer
optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)

# Learning Rate Scheduler
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=8, gamma=0.1)

In [70]:
def train(dataloader, model, loss_fn, optimizer, device, epoch):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)
        running_loss += loss.item()

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Calculate accuracy
        _, predicted = torch.max(pred, 1)
        total += y.size(0)
        correct += (predicted == y).sum().item()

        # Print statistics every 100 batches
        if batch % 100 == 0:
            loss_batch = running_loss / (batch + 1)
            acc_batch = 100 * correct / total
            print(f"[{epoch + 1}, {batch + 1:5d}] loss: {loss_batch:.4f}, Accuracy: {acc_batch:.2f}%")

    # Print statistics at the end of the epoch
    epoch_loss = running_loss / num_batches
    epoch_acc = 100 * correct / size
    print(f"End of Epoch {epoch + 1}: Avg loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%\n")

In [71]:
# Training Loop
epochs = 8

for epoch in range(epochs):
    train(train_loader, model, loss_fn, optimizer, device, epoch)
    scheduler.step()
    
print("Done!")

[1,     1] loss: 2.3288, Accuracy: 6.25%
[1,   101] loss: 2.1852, Accuracy: 28.57%
[1,   201] loss: 1.8436, Accuracy: 43.78%
[1,   301] loss: 1.4701, Accuracy: 55.98%
[1,   401] loss: 1.2134, Accuracy: 63.81%
[1,   501] loss: 1.0320, Accuracy: 69.24%
[1,   601] loss: 0.9009, Accuracy: 73.18%
End of Epoch 1: Avg loss: 0.8423, Accuracy: 74.88%

[2,     1] loss: 0.1735, Accuracy: 92.19%
[2,   101] loss: 0.2120, Accuracy: 93.77%
[2,   201] loss: 0.1959, Accuracy: 94.15%
[2,   301] loss: 0.1824, Accuracy: 94.57%
[2,   401] loss: 0.1777, Accuracy: 94.69%
[2,   501] loss: 0.1707, Accuracy: 94.91%
[2,   601] loss: 0.1655, Accuracy: 95.07%
End of Epoch 2: Avg loss: 0.1625, Accuracy: 95.16%

[3,     1] loss: 0.0726, Accuracy: 98.44%
[3,   101] loss: 0.1230, Accuracy: 96.19%
[3,   201] loss: 0.1205, Accuracy: 96.37%
[3,   301] loss: 0.1213, Accuracy: 96.36%
[3,   401] loss: 0.1182, Accuracy: 96.50%
[3,   501] loss: 0.1151, Accuracy: 96.55%
[3,   601] loss: 0.1115, Accuracy: 96.66%
End of Epoch 3:

In [72]:
model.eval()  # Set the model to evaluation mode
predictions = []

with torch.no_grad():
    for data in test_loader:
        # Move input data to the same device as the model
        data = data.to(device)

        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        predictions.extend(predicted.cpu().tolist())  # Move predictions back to CPU

# Create submission file
submission = pd.DataFrame({
    "ImageId": range(1, len(predictions) + 1),
    "Label": predictions
})

submission.to_csv('predictions.csv', index=False)