In [None]:
import os
import numpy as np
import pandas as pd


for dirname,_,filenames in os.walk("../input"):
    for filename in filenames:
        print(os.path.join(dirname,filename));

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader,TensorDataset
from torchvision.utils import make_grid

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import math 

%matplotlib inline

In [None]:
print(torch.cuda.is_available())
print(torch.backends.cudnn.enabled)

if(torch.cuda.is_available()):
    device = torch.device('cuda')
    print(device)

In [None]:
input_folder_path = '../input/digit-recognizer'
train_df = pd.read_csv(input_folder_path+'/train.csv')
test_df = pd.read_csv(input_folder_path+'/test.csv')

In [None]:
train_labels = train_df['label'].values
train_imgs = (train_df.iloc[:,1:].values).astype('float32')
test_imgs = (test_df.iloc[:,:].values).astype('float32')

train_imgs,val_imgs,train_labels,val_labels = train_test_split(train_imgs,train_labels,
                                                               stratify=train_labels,test_size=0.2)

In [None]:
train_imgs = train_imgs.reshape(train_imgs.shape[0],28,28)
val_imgs = val_imgs.reshape(val_imgs.shape[0],28,28)
test_imgs = test_imgs.reshape(test_imgs.shape[0],28,28)

In [None]:
for i in range(6,9):
    plt.subplot(330 + (i+1))
    plt.imshow(train_imgs[i].squeeze(),cmap=plt.get_cmap('gray'))
    plt.title(train_labels[i])

In [None]:
for i in range(6,9):
    plt.subplot(330 +(i+1))
    plt.imshow(test_imgs[i].squeeze(),cmap=plt.get_cmap('gray'))

In [None]:
train_imgs_tensor = torch.tensor(train_imgs)/255.0
train_labels_tensor = torch.tensor(train_labels)
train_tensor = TensorDataset(train_imgs_tensor,train_labels_tensor)

val_imgs_tensor = torch.tensor(val_imgs)/255.0
val_labels_tensor = torch.tensor(val_labels)
val_tensor = TensorDataset(val_imgs_tensor,val_labels_tensor)


test_imgs_tensor = torch.tensor(test_imgs)/255.0

In [None]:
train_loader = DataLoader(train_tensor,batch_size=16,num_workers=2,shuffle=True)
val_loader = DataLoader(val_tensor,batch_size=16,num_workers=2,shuffle=True)
test_loader = DataLoader(test_imgs_tensor,batch_size=16,num_workers=2,shuffle=False)

In [None]:
for batch_idx, (data, target) in enumerate(train_loader):
    img_grid = make_grid(data[0:8,].unsqueeze(1), nrow=8)
    img_target_labels = target[0:8,].numpy()
    break
    
plt.imshow(img_grid.numpy().transpose((1,2,0)))
plt.rcParams['figure.figsize'] = (10, 2)
plt.title(img_target_labels, size=16)
plt.show()

In [None]:
class DigitNet(nn.Module):
    def __init__(self):
        super(DigitNet,self).__init__()
        
        self.conv_block = nn.Sequential(
            nn.Conv2d(1,32,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32,64,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2,stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2) 
        )
        
        self.linear_block = nn.Sequential(
            nn.Dropout(p=0.2),
            nn.Linear(128*7*7,128),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
            nn.Dropout(0.2),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(inplace=True),
            nn.Dropout(0.2),
            nn.Linear(64, 10)
        )
        
    def forward(self,x):
        x = self.conv_block(x)
        x = x.view(x.size(0), -1)
        x = self.linear_block(x)
        
        return x

In [None]:
model = DigitNet()
model

In [None]:
optimizer = optim.Adam(params=model.parameters(),lr=0.003)
criterion = nn.CrossEntropyLoss()

lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

if torch.cuda.is_available():
    model = model.cuda()
    criterion = criterion.cuda()

In [None]:
def train_model(num_epoch):
    model.train()
    lr_scheduler.step()
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.unsqueeze(1)
        data, target = data, target
        
        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
            
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        
        if (batch_idx + 1)% 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                num_epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
                100. * (batch_idx + 1) / len(train_loader), loss.data))
            
def evaluate(data_loader):
    model.eval()
    loss = 0
    correct = 0
    
    for data, target in data_loader:
        data = data.unsqueeze(1)
        data, target = data, target
        
        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
        
        output = model(data)
        
        loss += F.cross_entropy(output, target, size_average=False).data

        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()
        
    loss /= len(data_loader.dataset)
        
    print('\nAverage Val Loss: {:.4f}, Val Accuracy: {}/{} ({:.3f}%)\n'.format(
        loss, correct, len(data_loader.dataset),
        100. * correct / len(data_loader.dataset)))

In [None]:
num_epochs = 10

for n in range(num_epochs):
    train_model(n)
    evaluate(val_loader)

In [None]:
def make_predictions(data_loader):
    model.eval()
    test_preds = torch.LongTensor()
    
    for i, data in enumerate(data_loader):
        data = data.unsqueeze(1)
        
        if torch.cuda.is_available():
            data = data.cuda()
            
        output = model(data)
        
        preds = output.cpu().data.max(1, keepdim=True)[1]
        test_preds = torch.cat((test_preds, preds), dim=0)
        
    return test_preds

In [None]:
test_set_preds = make_predictions(test_loader)

submission_df = pd.read_csv("../input/digit-recognizer/sample_submission.csv")
submission_df['Label'] = test_set_preds.numpy().squeeze()
submission_df.head()