In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install torch-lr-finder
import torch
from PIL import Image
import os
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset,DataLoader
import torchvision.models as models
import albumentations as A
import cv2
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch_lr_finder import *
from torch_lr_finder import LRFinder
import matplotlib.pyplot as plt
import torchvision.models as models
from sklearn.model_selection import train_test_split
from numpy import asarray 

In [None]:
if not os.path.exists('/root/.cache/torch/hub/checkpoints/'):
        os.makedirs('/root/.cache/torch/hub/checkpoints/')
!cp '../input/resnet18/resnet18.pth' '/root/.cache/torch/hub/checkpoints/resnet18-333f7ec4.pth'

In [None]:
tfms = transforms.Compose([
    
    transforms.RandomRotation(degrees=(-10,10)),
    transforms.RandomAffine(degrees=(-16, 16), translate=(.1,.1)),
    transforms.Normalize(mean=(0.1307,), std=(0.3081,))
                           
                          ])
v_tfms = transforms.Compose([
    transforms.Normalize(mean=(0.1307,), std=(0.3081,))
])

In [None]:
df = pd.read_csv('../input/digit-recognizer/train.csv')

In [None]:
df.info()

In [None]:
df.head()

In [None]:
df.iloc[0,1:]

In [None]:
X, y = df.iloc[:,1:].values, df['label'].values
X_train, X_valid, Y_train, Y_valid = train_test_split(X, y, test_size=0.2)

In [None]:
class NumDataset(Dataset):
    def __init__(self, DataFrame, Labels, Transform, test=False, valid=False):
        self.df = DataFrame
        self.transform = Transform
        self.lbs = Labels
        self.test = test
        self.valid = valid
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        
        x = self.df[index].reshape(28,28)
        x = np.expand_dims(x, axis=0)
        image = x.astype(np.float32)
        
        if self.valid:
            return v_tfms(torch.tensor(image)), self.lbs[index]
        elif self.test:
            return v_tfms(torch.tensor(image))
        else:
            image = self.transform(torch.tensor(image))
            return image, self.lbs[index]
            


In [None]:
num_trainset = NumDataset(X_train, Y_train, tfms)
num_trainloader = DataLoader(num_trainset, batch_size=64, shuffle=True, num_workers=4)

num_validset = NumDataset(X_valid, Y_valid, tfms, valid=True)
num_validloader = DataLoader(num_validset, batch_size=64, shuffle=True, num_workers=4)

In [None]:
def visualize(image, label):
    plt.figure(figsize=(4, 3))
    plt.xlabel(label)
    plt.imshow(image)

for i in range(10):
    visualize(num_trainset[i][0][-1][:], num_trainset[i][1] )

* The transforms should help. Slight angle rotation and affine transformation should make the training data more robust
* Dataset seems to be working. Lets load some models

In [None]:
def train_(epocs, loader, model, valid_set, early_stop):
    best_v_acc = 0.0
    bad_rds = 0
    for epoch in range(epocs):  # loop over the dataset multiple times
        print("Training...")
        correct = 0
        total = 0
        running_loss = 0.0
        for i, data in enumerate(loader, 0):
            model.train()
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.to('cuda'), labels.to('cuda')

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss=criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            max_index = outputs.max(dim = 1)[1]
            correct += (max_index == labels).sum().item()
            total += labels.size(0)

            
            # print statistics
            running_loss += loss.item()
            if i % len(loader) == len(loader) - 1:    
                print('[epoc: %d, %5d] training loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / len(loader)))
                running_loss = 0.0
        print("Training Accuracy = {}".format(100 * correct / total))
        
        
        correct = 0
        total = 0
        running_loss = 0.0
        print("Validating...")
        for i, data in enumerate(valid_set, 0):
            model.eval()
            with torch.no_grad():
                inputs, labels = data
                inputs, labels = inputs.to('cuda'), labels.to('cuda')
                outputs = model(inputs)
                loss=criterion(outputs, labels)
                max_index = outputs.max(dim = 1)[1]
                correct += (max_index == labels).sum().item()
                total += labels.size(0)
                
                # print statistics
                running_loss += loss.item()
                if i % len(valid_set) == len(valid_set) - 1:    
                    print('[epoc: %d, %5d] valid loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / len(loader)))
                    running_loss = 0.0
                    
        if (100 * correct / total) > best_v_acc:
            best_v_acc = 100 * correct / total
            print("BEST MODEL SAVED")
            torch.save(model.state_dict(), 'best_model.pt')
            bad_rds = 0
        else:
            bad_rds += 1
        print("\nvalid Accuracy = {}".format(100 * correct / total))
        
        if bad_rds == early_stop:
            print("EARLY STOPPING AFTER ", bad_rds, " BAD ROUNDS")
            return 

In [None]:
model_ft = models.resnet18(pretrained=True)

In [None]:
model_ft.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 10, bias=True)
model_ft = model_ft.to("cuda")

In [None]:
criterion = nn.CrossEntropyLoss()

In [None]:

optimizer = optim.SGD(model_ft.parameters(), lr=1e-7, momentum=0.9)
lr_finder = LRFinder(model_ft, optimizer, criterion)
lr_finder.range_test(num_trainloader, end_lr=100, num_iter=100)
lr_finder.plot()
lr_finder.reset()

In [None]:
optimizer = optim.SGD(model_ft.parameters(), lr=1e-2, momentum=0.9)

In [None]:
train_(20, num_trainloader, model_ft, num_validloader, 3)

In [None]:
model_ft.load_state_dict(torch.load('best_model.pt'))
model_ft.eval()

In [None]:
test_df = pd.read_csv('../input/digit-recognizer/test.csv')
sample_sub = pd.read_csv("/kaggle/input/digit-recognizer/sample_submission.csv")

In [None]:
test_df.head()

In [None]:
X_test = test_df.iloc[:,:].values
len(X_test)

In [None]:
test_set = NumDataset(X_test, None, v_tfms, test=True)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False, num_workers=4)

In [None]:
len(test_loader)

In [None]:
test_preds = []
with torch.no_grad():
    for x in test_loader:
        x = x.to('cuda')
        y_pred = model_ft(x)
        

        test_preds += y_pred.cpu().numpy().argmax(1).tolist()

    
test_preds = np.array(test_preds)

In [None]:
test_preds


In [None]:
sample_sub['Label'] = test_preds

In [None]:
sample_sub

In [None]:
sample_sub.to_csv('submission.csv', index=False)