In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import torch as t
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision as tv
import torchvision.datasets as datasets 
import torchvision.transforms as transforms

# Loading trained model

In [40]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        # image size = 28x28
        self.conv1 = nn.Conv2d(1, 10, 3) 
        # image size = 26x26 x 10
        self.pool = nn.MaxPool2d(2, 2)
        # image size = 13x13 x 10
        self.layer1 = nn.Linear(13 * 13 * 10, 50)
        self.layer2 = nn.Linear(50, 10)

    def forward(self, batch_inputs):
        y = self.pool(self.conv1(batch_inputs))
        y = t.flatten(y, 1)
        y = self.layer1(y)
        y = F.relu(y)
        y = self.layer2(y)
        # y = F.softmax(y, dim=1)
        return y
    
    def fit_batch(self, train_load, test_loader, loss_func, optimizer):
        self.train()
        for i, batch in enumerate(train_load):
            batch_x, batch_y = batch

            y_pred = self.forward(batch_x)
            loss = loss_func(y_pred, batch_y)
            loss.backward()

            optimizer.step()
            optimizer.zero_grad()

            # if i % 200 == 0:
            #     print(f"Batch {i}/{len(train_load)}      training loss: {loss}")

        total_loss = 0
        self.eval()
        with t.no_grad():
            for i, batch in enumerate(test_loader):
                batch_x, batch_y = batch
                y_pred = self.forward(batch_x)
                loss = loss_func(y_pred, batch_y)
                total_loss += loss
        total_loss /= len(test_loader)
        print(f"Validation loss: {total_loss}")
        return total_loss

In [41]:
model = Net()
model.load_state_dict(t.load("./handwritten_model.pth"))

<All keys matched successfully>

# Loading kaggle test

In [59]:
dataset_path = '../../../datasets/kaggle-digit-recognizer-comp/'

In [62]:
df = pd.read_csv(dataset_path + 'test.csv')
df = df / 255
df.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [63]:
X = t.tensor(df.values, dtype=t.float32).view(-1, 1, 28, 28)
X.shape

torch.Size([28000, 1, 28, 28])

In [64]:
preds = model.forward(X)

In [65]:
preds = t.argmax(preds, dim=1)

In [66]:
preds

tensor([2, 0, 9,  ..., 3, 9, 2])

In [68]:
# form csv in the format of the kaggle competition
# i.e. ImageId,Label
# 1,2
# 2,3
csv_pred = pd.DataFrame(preds.numpy(), columns=['Label'])
csv_pred.index += 1
csv_pred.index.name = 'ImageId'
csv_pred.to_csv(dataset_path + 'submission.csv')