In [None]:
!pip install torch-lr-finder

In [None]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')

import torch
from torch import nn
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math
import pandas as pd
import matplotlib.pyplot as plt
import timm
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

import albumentations as A
from albumentations.pytorch import ToTensorV2

from torch_lr_finder import *
from torch_lr_finder import LRFinder

from tqdm import tqdm

In [None]:
CFG = {
    'lr': 6e-04,
    'min_lr': 1e-4,
    'weight_decay': 1e-6,
    'num_workers': 4,
    'accum_iter': 2,
    'verbose_step': 1,
    'device': torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    'target_size': 10,
    'img_size': 28,
    'epochs': 50,
    'scheduler': 'ReduceLROnPlateau',
    'fold_num': 5,
    'seed': 2003,
    'batch_size': 64,
}

In [None]:
data = pd.read_csv('../input/digit-recognizer/train.csv')
test_data = pd.read_csv('../input/digit-recognizer/test.csv')

In [None]:
train_transform = A.Compose(
    [
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=5, p=0.75)
        #ToTensorV2(),
    ]
)

In [None]:
class dataset(Dataset):
    def __init__(self, df, transform=None, is_test=False):
        self.df = df
        self.transform = transform
        self.is_test = is_test
        self.iloc_param = 0 if is_test else 1
        
    def __len__(self):    
        return len(self.df)
    
    def __getitem__(self, idx):
        image = self.df.iloc[idx,self.iloc_param:].to_numpy().reshape((28,28)) / 255.
            
       
        if self.transform is not None:
            image = self.transform(image=image)["image"]
        
        image = torch.from_numpy(image).float()
        image = torch.stack([image,image,image],dim=0)
        
        if self.is_test == True:
            return image
        else:
            label = self.df.iloc[idx,0]
            return image, label
        

In [None]:
skf = StratifiedKFold(n_splits=5)
splitter = skf.split(data.iloc[:,1:], data.iloc[:,0])
gen = next(splitter)
train, val = data.iloc[gen[0],:], data.iloc[gen[1], :]

#datasets 
train_dataset = dataset(train.reset_index(drop=True), train_transform)
valid_dataest = dataset(val.reset_index(drop=True), None)

#loaders
train_loader = DataLoader(dataset=train_dataset, batch_size=CFG['batch_size'], shuffle=True, num_workers=4)
validation_loader = DataLoader(dataset=valid_dataest, batch_size=CFG['batch_size'], shuffle=True, num_workers=4)


#testing-
test = iter(train_loader)
n = next(test)
print(n[0].shape) # (batch_size, channels, img_x, img_y)

#print an example
print(n[1][3])
plt.imshow(n[0][3,0,:,:])

In [None]:
class ResNet50(nn.Module):
    def __init__(self, model_name, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        #changing the output fully connected layer
        n_features = self.model.fc.in_features
        self.model.fc = nn.Linear(n_features, CFG['target_size'])
        #softmax at the end, no need when using Crossentropy?
        
        #not required when using nn.CrossEntropyLoss
        #self.model.add_module("softmax", torch.nn.Softmax(dim=-1))
        
    def forward(self, x):
        x = self.model(x)
        return x

In [None]:
#instantiating training
model = ResNet50('resnet50', pretrained=True)
model = model.to(CFG['device'])
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=CFG['lr']) 

In [None]:
opt = torch.optim.Adam(model.parameters(), lr=1e-5)
lr_finder = LRFinder(model, opt, criterion)
lr_finder.range_test(train_loader, end_lr=10, num_iter=200)
lr_finder.plot()
lr_finder.reset()

In [None]:
n_total_steps = len(train_loader)
val_samples = len(validation_loader) * CFG['batch_size']
total_epochs = CFG['epochs']
best_score = 0
no_improvement_rounds = 0


for epoch in range(total_epochs):
    #training
    model.train()
    bar = tqdm(train_loader, position=0)
    average_loss = 0
    total_samples = 0
    for i ,(images, labels) in enumerate(bar):
        
        images = images.to(CFG['device'])
        labels = labels.to(CFG['device'])
        
        #zeroing gradients
        optimizer.zero_grad()
        
        #forward
        y_preds = model(images)
        loss = criterion(y_preds, labels)
        
        average_loss += loss
        total_samples += labels.size(0)
        
        #back propogation
        loss.backward()
        optimizer.step()
        

        bar.set_description(f'epoch: {epoch + 1}/{total_epochs} avg_loss: {average_loss / total_samples:.6f}')
    
    #validation   
    with torch.no_grad():
        model.eval()
        n_correct = 0
        val_loss = 0
        total_samples = 0
        for images, labels in validation_loader:
            images = images.to(CFG['device'])
            labels = labels.to(CFG['device'])
            
            #preds for validation
            val_preds = (model(images))
            val, idx = torch.max(val_preds, 1)
            n_correct += (idx == labels).sum().item()
            
            val_loss += criterion(val_preds, labels)
            total_samples += labels.size(0)
            
        acc = 100.0 * n_correct / val_samples
        if acc > best_score:
            best_score = acc
            print(f'Val_loss: {val_loss / total_samples:.6f}, New best score reached! best score is {acc:.3f}, saving model')
            torch.save(model.state_dict(), 'best_model.pt')
            no_improvement_rounds = 0
        else:
            print(f'Val_loss: {val_loss / total_samples:.6f}, Accuracy did not improve! {acc:.3f}')
            no_improvement_rounds +=1
        
        if no_improvement_rounds == 2:
            new_lr = 0
            for g in optimizer.param_groups:
                new_lr = g['lr'] * 0.5
                g['lr'] = new_lr
            print(f'reducing learning rate to {new_lr}')
                 
        if no_improvement_rounds == 5:
            print('Early stopping')
            break

In [None]:
#submission

test_preds = []

#loaders
test_dataset = dataset(test_data, None, is_test=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=len(test_dataset), shuffle=False, num_workers=4)

model.load_state_dict(torch.load('best_model.pt'))

with torch.no_grad():
    for images in test_loader:
        images = images.to(CFG['device'])
        val_preds = (model(images))
        val, idx = torch.max(val_preds, 1)
        test_preds.append(idx.cpu().numpy().flatten())

In [None]:
test_preds = np.asarray(test_preds).flatten()
submission = pd.read_csv('../input/digit-recognizer/sample_submission.csv')
submission['Label'] = test_preds

In [None]:
submission.to_csv('submission.csv', index=False)