In [1]:
import torch
import torchvision
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [2]:
torch.manual_seed(42)
np.random.seed(42)
torch.cuda.manual_seed_all(42)

In [3]:
dftrain = pd.read_csv("../input/digit-recognizer/train.csv")
dftest = pd.read_csv("../input/digit-recognizer/test.csv")
sub = pd.read_csv('../input/digit-recognizer/sample_submission.csv')

In [4]:
class CustomImageDataset(Dataset):
    def __init__(self, df, transform=None, train=True):
        if train:
            self.labels = df.iloc[:,0]
            self.imgs = df.iloc[:,1:]
        else:
            self.imgs = df.iloc[:,:]
        self.transform = transform

    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, idx):
        image = torch.tensor(self.imgs.iloc[idx,:])
        image = torch.reshape(image, (1, 28, 28) )
        label = torch.tensor(self.labels[idx])
        return image, label

In [5]:
class CustomTestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.imgs = df.iloc[:,:]
        self.transform = transform

    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, idx):
        image = torch.tensor(self.imgs.iloc[idx,:])
        image = torch.reshape(image, (1, 28, 28) )
        return image

In [6]:
train_dataloader = DataLoader(dataset=CustomImageDataset(dftrain), batch_size=512, shuffle=True)
test_dataloader = DataLoader(dataset=CustomTestDataset(dftest), batch_size=512)

In [7]:
class net(nn.Module):
    # Constructor
    def __init__(self):
        super(net, self).__init__()
        self.conv1_1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, padding='same')
        self.conv1_2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding='same')
        

        self.conv2_1 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding='same')
        self.conv2_2 = nn.Conv2d(in_channels=256, out_channels=64, kernel_size=3, padding='same')
        
        self.bn1 = nn.BatchNorm2d(128)
        self.bn2 = nn.BatchNorm2d(64)
        
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(3136, 1024)
        self.fc2 = nn.Linear(1024, 10)
        
        self.relu = nn.ReLU()
        
        self.dropout = nn.Dropout(p=0.2)
        
    # Prediction
    def forward(self, x):   
        y = self.conv1_1(x)
        y = self.relu(y)
        y = self.conv1_2(y)
        y = self.relu(y)
        
        y = self.pool(y)
        
        y = self.bn1(y)
        
        y = self.conv2_1(y)
        y = self.relu(y)
        y = self.conv2_2(y)
        y = self.relu(y)
        
        y = self.pool(y)
        y = self.bn2(y)
        
        y = y.reshape(y.shape[0], -1)
        
        y = self.fc1(y)
        y = self.relu(y)
        
        y = self.dropout(y)
        
        y = self.fc2(y)
        y = self.relu(y)
        
        return y

In [8]:
device = torch.device('cuda')
model = net()
model = model.to(device)

In [9]:
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, verbose=True)
criterion = nn.CrossEntropyLoss()

In [10]:
n_epochs = 12
loss_list = []
def train_model(n_epochs):
    prev = 1000
    for epoch in range(n_epochs):
        l = 0
        for x, y in train_dataloader:
            x = x.to(device, dtype=torch.float32)
            y = y.to(device)
            optimizer.zero_grad()
            z = model(x)
            loss = criterion(z, y)
            loss.backward()
            optimizer.step()
            l += loss
    
        loss_list.append(l)
        print(l)
        if (l < prev):
            prev = l
            torch.save(model, './checkpoint.pth')
        else:
            scheduler.step(loss)

train_model(n_epochs)

tensor(16.6495, device='cuda:0', grad_fn=<AddBackward0>)
tensor(3.0476, device='cuda:0', grad_fn=<AddBackward0>)
tensor(1.6377, device='cuda:0', grad_fn=<AddBackward0>)
tensor(1.1888, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.7720, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.5743, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.3754, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.5405, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.6209, device='cuda:0', grad_fn=<AddBackward0>)
tensor(3.2575, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.9068, device='cuda:0', grad_fn=<AddBackward0>)
Epoch     4: reducing learning rate of group 0 to 1.0000e-04.
tensor(0.3466, device='cuda:0', grad_fn=<AddBackward0>)


In [11]:
torch.save(model, './checkpoint.pth')

In [12]:
m = torch.load('./checkpoint.pth')

In [13]:
pred = torch.tensor([])
for x in test_dataloader:
    x = x.to(device, dtype=torch.float32)
    pred = torch.cat((pred, np.argmax(m(x).detach().cpu(), axis = 1)))

In [14]:
pred.shape

torch.Size([28000])

In [15]:
sub['Label'] = pred.type(torch.IntTensor)
sub.to_csv('submission.csv', index=False)

In [16]:
sub.head()

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,9
4,5,3
