In [1]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from  torchvision import transforms
from torch.utils.data import DataLoader

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score

import tqdm.notebook as t
import os

In [2]:
torch.manual_seed(17)

<torch._C.Generator at 0x7ff0699b4310>

In [3]:
DEVICE = torch.device('cuda')
BATCH_SIZE = 8
EPOCH = 30

In [4]:
TRAIN_DATA = pd.read_csv('../input/digit-recognizer/train.csv')
TEST_DATA = pd.read_csv('../input/digit-recognizer/test.csv')
Y_TRAIN_DATA = TRAIN_DATA.pop('label')

In [5]:
one_hot_encoder = OneHotEncoder(sparse=False)
y_encoded = one_hot_encoder.fit_transform(Y_TRAIN_DATA.to_numpy().reshape(-1,1))

In [6]:
train_data, val_data, y_train, y_val = train_test_split(TRAIN_DATA.to_numpy(), y_encoded, train_size=0.8, random_state=1)

In [7]:
class ImageTrainDataset():
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        data = self.data[idx]
        label = self.labels[idx]
        
        data = data/255.0
        data = np.reshape(data, (1, 28,28))
        
        data = torch.tensor(data).type(torch.float)
        label = torch.tensor(label, dtype=torch.float)
        
        return data, label
        
class ImageTestDataset():
    def __init__(self, data):
        self.data = data
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        data = self.data[idx]
        
        data = data/255.0
        data = np.reshape(data, (1, 28,28))
        
        data = torch.tensor(data).type(torch.float)
        
        return data

In [8]:
train_dataset = ImageTrainDataset(train_data, y_train)
val_dataset = ImageTrainDataset(val_data, y_val)
test_dataset = ImageTestDataset(TEST_DATA.to_numpy())

In [9]:
train_dl = DataLoader(train_dataset, BATCH_SIZE, pin_memory=True, shuffle=True, num_workers=os.cpu_count())
val_dl =DataLoader(val_dataset, BATCH_SIZE, pin_memory=True, num_workers=os.cpu_count())
test_dl =DataLoader(test_dataset, BATCH_SIZE, pin_memory=True, num_workers=os.cpu_count())

In [10]:
class DigitRecognizer(nn.Module):
    def __init__(self, num_channels):
        super(DigitRecognizer, self).__init__()
        
        self.loss_fn = nn.BCELoss(reduction='mean')
        
        self.tranform1 = nn.Sequential(
            transforms.RandomRotation(70)
        )
        
        self.conv1 = nn.Sequential(
            nn.Conv2d(num_channels, 128, kernel_size=3, padding=2, stride=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, padding=1, stride=1),
            nn.Dropout2d(0.1)
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, padding=2, stride=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, padding=1, stride=1),
            nn.Dropout2d(0.1)
        )
        
        self.conv3 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, padding=2, stride=1),
            nn.LeakyReLU(),
            nn.MaxPool2d(kernel_size=3, padding=1, stride=1)
        )
        
        self.conv4 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, padding=2, stride=1),
            nn.LeakyReLU(),
            nn.MaxPool2d(kernel_size=3, padding=1, stride=1)
        )
        
        self.conv5 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, padding=2, stride=1),
            nn.LeakyReLU(),
            nn.MaxPool2d(kernel_size=3, padding=1, stride=1)
        )

        self.flatten1 = nn.Flatten()
        self.linear1 = nn.Linear(184832,10)
        self.sigmoid1 = nn.Sigmoid()
        
    def forward(self, x, y):
        
        output = x
        output = self.tranform1(output)
        output = self.conv1(output)
        output = self.conv2(output)
        output = self.conv3(output)
        output = self.conv4(output)
        output = self.conv5(output)

        output = self.flatten1(output)
        output = self.linear1(output)
        output = self.sigmoid1(output)

        loss = self.loss_fn(output, y)
        
        return loss, output
    
    def predict(self, x):
        
        output = x
        
        output = self.conv1(output)
        output = self.conv2(output)
        output = self.conv3(output)
        output = self.conv4(output)
        output = self.conv5(output)
            
        output = self.flatten1(output)
        output = self.linear1(output)
        output = self.sigmoid1(output)

        return np.argmax(output.detach().numpy(), axis=1)
    
    def train_using_train_dl(self, optimizer, train_dl, device):
        
        self.train()
        
        losses = []
        acc = []
        
        total = len(train_dl)

        for idx , data in enumerate(train_dl):
            image, label = data
            image = image.to(device)
            label = label.to(device)
            
            optimizer.zero_grad()
            
            loss, output = self(image, label)
            losses.append(loss.item())
            loss.backward()
            
            optimizer.step()
            
            acc.append(accuracy_score(np.argmax(label.cpu(), axis=1), np.argmax(output.cpu().detach().numpy(), axis=1)))
            
            print(f'{idx} / {total} - Loss : {sum(losses) / len(losses):0.4f}, Accuracy : {sum(acc) / len(acc):0.4f}', end='\r')
            
        return sum(losses) / len(losses) , sum(acc) / len(acc)
    
    def evaluate_using_val_dl(self, val_dl, device):
        
        self.eval()
        
        losses = []
        acc = []
        total = len(val_dl)
        
        with torch.no_grad():
            
            for idx, data in enumerate(val_dl):
                image, label = data
                image = image.to(device)
                label = label.to(device)

                loss, output = self(image, label)
                losses.append(loss.item())

                acc.append(accuracy_score(np.argmax(label.cpu(), axis=1), np.argmax(output.cpu().detach().numpy(), axis=1)))

                print(f'{idx} / {total} - Val_Loss : {sum(losses) / len(losses):0.4f}, Val_Accuracy : {sum(acc) / len(acc):0.4f}', end='\r')
                
        return sum(losses) / len(losses) , sum(acc) / len(acc)

In [11]:
model = DigitRecognizer(num_channels=1)
model.to(DEVICE)

DigitRecognizer(
  (loss_fn): BCELoss()
  (tranform1): Sequential(
    (0): RandomRotation(degrees=[-70.0, 70.0], interpolation=nearest, expand=False, fill=0)
  )
  (conv1): Sequential(
    (0): Conv2d(1, 128, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
    (3): Dropout2d(p=0.1, inplace=False)
  )
  (conv2): Sequential(
    (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
    (3): Dropout2d(p=0.1, inplace=False)
  )
  (conv3): Sequential(
    (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): LeakyReLU(negative_slope=0.01)
    (2): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
  )
  (conv4): Sequential(
    (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): LeakyReLU(ne

In [12]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, eps=0.1)

In [13]:
def train_model(epochs, model, optimizer, train_dl, val_dl, device):
    
    history = {
                'train_loss' : [],
                'val_loss' : [],
                'train_accuracy' : [],
                'val_accuracy' : []
              }
    
    for epoch in range(epochs):
        print(f'EPOCH : {epoch + 1} / {epochs}')
        
        train_loss, train_acc = model.train_using_train_dl(optimizer, train_dl, device)
        val_loss, val_acc = model.evaluate_using_val_dl(val_dl, device)
        
        history['train_loss'].append(train_loss)
        history['val_loss'].append(val_loss)
        history['train_accuracy'].append(train_acc)
        history['val_accuracy'].append(val_acc)
        
        print(f'Loss : {train_loss:0.4f}, Accuracy : {train_acc:0.4f}, Val_Loss : {val_loss:0.4f}, Val_Accuracy : {val_acc:0.4f}')

    return history
        
        

In [14]:
train_model(EPOCH, model, optimizer, train_dl, val_dl, DEVICE)

EPOCH : 1 / 30
Loss : 0.2115, Accuracy : 0.5600, Val_Loss : 0.0980, Val_Accuracy : 0.8536
EPOCH : 2 / 30
Loss : 0.0780, Accuracy : 0.8915, Val_Loss : 0.0567, Val_Accuracy : 0.9282
EPOCH : 3 / 30
Loss : 0.0511, Accuracy : 0.9346, Val_Loss : 0.0468, Val_Accuracy : 0.9442
EPOCH : 4 / 30
Loss : 0.0422, Accuracy : 0.9474, Val_Loss : 0.0347, Val_Accuracy : 0.9579
EPOCH : 5 / 30
Loss : 0.0379, Accuracy : 0.9530, Val_Loss : 0.0327, Val_Accuracy : 0.9602
EPOCH : 6 / 30
Loss : 0.0346, Accuracy : 0.9562, Val_Loss : 0.0296, Val_Accuracy : 0.9617
EPOCH : 7 / 30
Loss : 0.0313, Accuracy : 0.9613, Val_Loss : 0.0281, Val_Accuracy : 0.9652
EPOCH : 8 / 30
Loss : 0.0294, Accuracy : 0.9642, Val_Loss : 0.0255, Val_Accuracy : 0.9677
EPOCH : 9 / 30
Loss : 0.0276, Accuracy : 0.9663, Val_Loss : 0.0243, Val_Accuracy : 0.9704
EPOCH : 10 / 30
Loss : 0.0266, Accuracy : 0.9668, Val_Loss : 0.0247, Val_Accuracy : 0.9675
EPOCH : 11 / 30
Loss : 0.0256, Accuracy : 0.9687, Val_Loss : 0.0236, Val_Accuracy : 0.9693
EPOCH : 

{'train_loss': [0.21153259943461133,
  0.07804063595631824,
  0.05113654467149727,
  0.04215784822887231,
  0.03787253882551644,
  0.034556994927725536,
  0.031312518204670466,
  0.029378620238761262,
  0.027595276684206468,
  0.026594411962976752,
  0.025563416975578766,
  0.02421474423087992,
  0.023666126965943564,
  0.022748281942823224,
  0.02169014657851048,
  0.021503553675988985,
  0.020854933355963327,
  0.019830585428259967,
  0.019688504732411752,
  0.019538256003233116,
  0.01866907519946662,
  0.017822591328818124,
  0.018137061064971066,
  0.01779576699114668,
  0.016944813246194654,
  0.01714754890063367,
  0.016412455520010208,
  0.016214311619894162,
  0.01611991016185736,
  0.01609204369589168],
 'val_loss': [0.09802711584028743,
  0.05667421108666098,
  0.04680197139891485,
  0.03472160073274392,
  0.032685732798251724,
  0.029584855395008897,
  0.028065186915087647,
  0.025462846846923987,
  0.024323509024966154,
  0.024714145743277013,
  0.023556892422271804,
  0.0

In [15]:
def create_submission(model, test_dl):
    model.to('cpu')
    submission = {'ImageId': [],
                'Label' : []}
    
    for data in t.tqdm_notebook(test_dl):
        pred = model.predict(data)
        submission['Label'].extend(pred)
    submission['ImageId'] = list(range(1,len(submission['Label']) + 1))
    return submission

In [16]:
submission = create_submission(model, test_dl)
df_submission = pd.DataFrame(submission)
df_submission.to_csv('submission.csv', index=False)

  0%|          | 0/3500 [00:00<?, ?it/s]

In [17]:
df_submission

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,0
4,5,3
...,...,...
27995,27996,9
27996,27997,7
27997,27998,3
27998,27999,9
