### Importing Libraries

In [None]:
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.transforms import transforms
from torch.utils.data import Dataset, DataLoader 

### Loading data into pandas dataframe

In [None]:
train_csv = '../input/digit-recognizer/train.csv'
test_csv = '../input/digit-recognizer/test.csv'

In [None]:
train_df = pd.read_csv(train_csv)
test_df = pd.read_csv(test_csv)

In [None]:
train_df.head()

In [None]:
test_df.head()

### Building Custom Dataset

In [None]:
class CustomDataset(Dataset):
    def __init__(self, df, transform, train_data=True):
        self.df = df
        self.transform = transform
        self.train_data = train_data
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        
        if self.train_data:
            label = self.df.iloc[index, 0]
            flatten_image = self.df.iloc[index, 1:].astype('float32').values # shape: (784)
        else:
            flatten_image = self.df.iloc[index].astype('float32').values # shape: (784)
        
        # reshape 
        image_np = flatten_image.reshape(28,28)
        
        image = Image.fromarray(image_np)
        image = self.transform(image)
        
        if self.train_data:
            return image, torch.tensor(label)
        return image

In [None]:
transform = transforms.Compose([
    transforms.Resize((28,28)),
    transforms.ToTensor(),
    transforms.Normalize((0.5), (0.5))
])
train_dataset = CustomDataset(train_df, transform=transform, train_data=True)
test_dataset = CustomDataset(test_df, transform=transform, train_data=False)

In [None]:
# split train into train and val
train_size = int(0.8 * len(train_dataset))
test_size = len(train_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, test_size])

In [None]:
BATCH_SIZE = 32
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=BATCH_SIZE)

### Visulaize one batch from training data

In [None]:
X, y = next(iter(train_loader))
X.shape, y.shape

In [None]:
num_cols = 8
num_rows = BATCH_SIZE // num_cols
f, ax = plt.subplots(num_rows, num_cols)
comp = 0

for i in range(num_rows):
    for j in range(num_cols):
        comp += 1
        idx = i*num_cols+j
        img = X[idx]
        img = img.squeeze()
        img = img.numpy()
        ax[i,j].imshow(img, cmap='gray')
        ax[i,j].set_title(y[idx].item())
        ax[i,j].axis('off')

    f.set_figheight(7)
    f.set_figwidth(15)

plt.show()

### Let's build CNN architecture

In [None]:
class CNN(nn.Module):
    def __init__(self, num_classes=10):
        super(CNN, self).__init__()
        
        # conv layers
        self.conv1 = nn.Conv2d(in_channels=1,
                               out_channels=16,
                               kernel_size=(3,3),
                               stride=(1,1),
                               padding=(1,1))
        
        self.conv2 = nn.Conv2d(in_channels=16,
                               out_channels=32,
                               kernel_size=(3,3),
                               stride=(1,1),
                               padding=(1,1))
        
        self.pool = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))
        self.relu = nn.ReLU()
        
        # linear layers
        self.fc1 = nn.Linear(32*7*7, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, num_classes)
        
        # dropout 
        self.dropout = nn.Dropout(p=0.5)
    
    def forward(self, x):
        # shape of x: (batch_size, channels, h, w)
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))

        # flattten x
        x = x.reshape(x.shape[0], -1)
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.dropout(self.relu(self.fc2(x)))
        x = self.fc3(x)
        return x

In [None]:
# Test model on sample example
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = CNN().to(device)
out = model(X.to(device))
print(out.shape)

### Define loss function and optimizer

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)

### Let's train model

In [None]:
def train_epoch(model, data_loader, device, criterion, optimizer):
    model.train()

    losses = []
    correct = 0
    total = 0

    for batch_idx, (x, y) in enumerate(tqdm(data_loader)):
        x = x.to(device)  
        y = y.to(device) 

        output = model(x)

        loss = criterion(output, y)

        losses.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        preds = F.softmax(output, dim=1)
        preds = preds.argmax(dim=1, keepdim=True).reshape(-1)
        correct += (preds == y).sum().item()
        total += preds.size(0)

    acc = (correct * 1.0) / total
    
    return acc, np.mean(losses)

In [None]:
def val_epoch(model, data_loader, device, criterion):
    model.eval()

    losses = []
    correct = 0
    total = 0

    with torch.no_grad():
        for batch_idx, (x, y) in enumerate(tqdm(data_loader)):
            x = x.to(device)  
            y = y.to(device) 

            output = model(x)

            loss = criterion(output, y)

            losses.append(loss.item())

            preds = F.softmax(output, dim=1)
            preds = preds.argmax(dim=1, keepdim=True).reshape(-1)
            correct += (preds == y).sum().item()
            total += preds.size(0)

    acc = (correct * 1.0) / total

    return acc, np.mean(losses)

In [None]:
# Train model
EPOCHS = 50

best_val_acc = 0

for epoch in range(EPOCHS):
    print(f'Epoch: {epoch+1}/{EPOCHS}')
    print('-'*10)
    print('Training')
    train_acc, train_loss = train_epoch(model, train_loader, device, criterion, optimizer)
    print('Validating')
    val_acc, val_loss = val_epoch(model, val_loader, device, criterion)
    print(f'Train Loss: {train_loss}\tTrain Acc: {train_acc}')
    print(f'Val Loss: {val_loss}\tVal Acc: {val_acc}')
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'best_model.pth.tar')

### Let's test the model

In [None]:
# Load best model
model.load_state_dict(torch.load('best_model.pth.tar'))

In [None]:
def test_model(model, data_loader, device):
    model.eval()

    predictions_list = []

    with torch.no_grad():
        for batch_idx, x in enumerate(tqdm(data_loader)):
            x = x.to(device)  

            output = model(x)

            _, preds = torch.max(output, dim=1)

            predictions_list.append(preds.view(-1).cpu())

    return predictions_list

In [None]:
predictions_list = test_model(model, test_loader, device)
y_pred = torch.cat(predictions_list).numpy()

### Let's visualize predicted output

In [None]:
X_test = next(iter(test_loader))
X_test.shape

In [None]:
output = model(X_test.to(device))
_, preds = torch.max(output, dim=1)
preds.shape

In [None]:
num_cols = 8
num_rows = BATCH_SIZE // num_cols
f, ax = plt.subplots(num_rows, num_cols)
comp = 0
f.suptitle('Predictions')
for i in range(num_rows):
    for j in range(num_cols):
        comp += 1
        idx = i*num_cols+j
        img = X_test[idx]
        img = img.squeeze()
        img = img.numpy()
        ax[i,j].imshow(img, cmap='gray')
        ax[i,j].set_title(preds[idx].item())
        ax[i,j].axis('off')

    f.set_figheight(7)
    f.set_figwidth(15)

plt.show()

Looks pretty good.

### Make submission

In [None]:
submission_df = pd.read_csv('../input/digit-recognizer/sample_submission.csv')

In [None]:
submission_df.head()

In [None]:
submission_df['Label'] = y_pred
submission_df.to_csv('submission.csv', index=False)
print('Done!')