In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import torch
import torchvision
import torch.nn.functional as F
from torch import optim, nn
from torchvision import transforms, models, datasets

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from tqdm import tqdm
import shutil 

%matplotlib inline

In [None]:
print(torch.cuda.is_available())
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

### Load data

In [None]:
path = "/kaggle/input/digit-recognizer/"
train_df = pd.read_csv(path + 'train.csv')
test_df = pd.read_csv(path + 'test.csv')
train_df.shape, test_df.shape

Convert data in float

In [None]:
train_classes = train_df['label'].values
train_img = (train_df.iloc[:, 1:].values).astype('float32')
test_img = (test_df.iloc[:, :].values).astype('float32')
train_img.shape

Split data in 80% to train and 20% to val

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
                                    train_img, train_classes, stratify = train_classes,
                                    test_size = 0.2, random_state = 42)
X_train.shape, y_train.shape, X_test.shape, test_img.shape

In [None]:
train = np.array(X_train).reshape(33600, 1, 28, 28)
val = X_test.reshape(8400, 1, 28, 28)
test = test_img.reshape(28000, 1, 28, 28)

train.shape

### Visualization data

In [None]:
for i in range(3):
    plt.subplot(330 + (i + 1))
    plt.imshow(train[i].squeeze(), cmap=plt.get_cmap('gray'))
    plt.title(y_train[i])

In [None]:
# train
x_train_tensor = torch.tensor(train)/255
y_train_tensor = torch.tensor(y_train)
train_tensor = torch.utils.data.TensorDataset(x_train_tensor, y_train_tensor)

# val
x_test_tensor = torch.tensor(val)/255
y_test_tensor = torch.tensor(y_test)
val_tensor = torch.utils.data.TensorDataset(x_test_tensor, y_test_tensor)

# test
test_tensor = torch.tensor(test)/255

In [None]:
train_loader = torch.utils.data.DataLoader(train_tensor, batch_size=64, num_workers= 4, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_tensor, batch_size = 64, num_workers=4, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_tensor, batch_size= 64, shuffle=False)

### Resnet model

In [None]:
class Neural(nn.Module):
    def __init__(self, model, output):
        super(Neural, self).__init__()
        self.model = nn.Sequential(
                nn.Conv2d(1, 64, kernel_size = (7,7), stride = (2, 2), padding = (3, 3), bias = False),
                *(list(model.children())[1:-1]),
                nn.AdaptiveMaxPool2d(output_size = 1),
                nn.Flatten(),
                nn.BatchNorm1d(512, eps=1e-05, momentum=0.1, track_running_stats = True, affine = True),
                nn.Dropout(p = 0.25, inplace = True),
                )
        self.fc = nn.Sequential(
                nn.Linear(in_features=512, out_features = 128, bias=True),
                nn.BatchNorm1d(128, eps=1e-05, momentum=0.1, track_running_stats = True, affine = True),
                nn.Dropout(p=0.3, inplace = True),
                nn.ReLU(inplace = True),
                nn.Linear(128, output, bias = True),
                nn.Softmax(dim = 1))

    
    def forward(self, x):

        x = self.model(x)
        x = self.fc(x)
        
        return x
    
resnet_model = models.resnet18(pretrained=True)
model = Neural(resnet_model, 10)
model

In [None]:
opt = optim.Adam(model.parameters(), lr=0.02)
loss_func = nn.CrossEntropyLoss()
scheduler = optim.lr_scheduler.OneCycleLR(opt, max_lr=0.003, steps_per_epoch=len(train_loader.dataset), epochs=10)
model.to(device);

In [None]:
%time
def train_model(models, loss, optimizer, train_data, val_data, scheduler=None, num_epoch=15):
    
    train_loss = []
    train_acc = []
    val_loss = []
    val_acc = []
    
    best_train_acc, best_val_acc = 0, 0
    
    for epoch in tqdm(range(num_epoch)):
        
        models.train()
        _train_loss, _train_acc = 0, 0
            
        for batch, (xb, yb) in enumerate(train_data):
            xb, yb = xb.to(device), yb.to(device)
            pred = models(xb)
            yb = torch.squeeze(yb).long()

            _loss = loss(pred, yb)
            
            _, pred = torch.max(pred, dim=1)
            
            _train_acc += torch.sum(pred==yb).item()
            _train_loss += _loss.item()
            _loss.backward()
            
            
            optimizer.step()
            optimizer.zero_grad()
            if scheduler is not None:
                scheduler.step()  
            if batch % 200 == 0:
                print(str(epoch) + "  loss:  " + str(_loss.item()))
         
        accuracy = 100 * _train_acc / len(train_data.dataset)
        print('Best accuracy train epoch: ' + str(epoch) + ' ' + str(accuracy))
        train_loss.append(_train_loss / len(train_data.dataset))
        train_acc.append(accuracy)
        models.eval()
        with torch.no_grad():
            _val_loss, _val_acc = 0, 0
            for val_x, val_y in val_data:
                val_x, val_y = val_x.to(device), val_y.to(device)
                pred = models(val_x)
                _val_loss += loss(pred, val_y).item()
                _, preds = torch.max(pred, 1)
                _val_acc += torch.sum(preds == val_y.data)
                
            val_accuracy = 100 * _val_acc.item() / len(val_data.dataset)
            print("Best accuracy val epoch " + str(epoch) + ' = ' + str(val_accuracy))
            if val_accuracy >= best_val_acc:
                print('Save best model')
                torch.save(model.state_dict(), f"test.pth")
                best_val_acc = val_accuracy
            val_loss.append(_val_loss / len(val_data.dataset))
            val_acc.append(val_accuracy)
            
         
    return models, train_loss, train_acc, val_acc, val_loss

new_model, train_loss, train_acc, val_acc, val_loss = train_model(model, loss_func, opt, train_loader, val_loader, scheduler=scheduler, num_epoch=10)

In [None]:
def show_loss_and_accuracy(train_loss, val_loss, train_acc, val_acc, title):

    fig, (ax1, ax2) = plt.subplots(figsize=(20, 12), nrows=2)

    x = list(range(len(train_loss)))

    ax1.plot(x, train_loss,  label='train_loss')
    ax1.plot(x, val_loss, label='val_loss')
    ax1.set_title("loss in train and val")
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('loss')
    ax1.legend()

    ax2.plot(x, train_acc, label='train_accuracy')
    ax2.plot(x, val_acc, label='val_accuracy')
    ax2.set_title("Accuracy in train and val")
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy')

    fig.show((ax1, ax2))
        
        
show_loss_and_accuracy(train_loss, val_loss, train_acc, val_acc, title='loss')

In [None]:
count = 0
for n, param in enumerate(new_model.parameters()):
    count += 1
    if n >= 50:
        param.requires_grad = True

count

In [None]:
opt = optim.Adam(new_model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()
scheduler = optim.lr_scheduler.OneCycleLR(opt, max_lr=0.0001, steps_per_epoch=len(train_loader.dataset), epochs=10)
new_model.to(device);

In [None]:
new_model, train_loss, train_acc, val_acc, val_loss = train_model(new_model, loss_func, opt, train_loader, val_loader, scheduler=scheduler, num_epoch=10)

In [None]:
def predicts(model, data):
    model.eval()
    test_preds = torch.LongTensor()
    for i, item in enumerate(data):
        item = item.to(device)
        out = model(item)
        _, preds = torch.max(out.data, 1)
        test_preds = torch.cat((test_preds, preds.cpu()), dim = 0)
        
    return test_preds

In [None]:
test_set_pred = predicts(new_model, test_loader)

subm_df = pd.read_csv('/kaggle/input/digit-recognizer/sample_submission.csv')

subm_df['Label'] = test_set_pred
subm_df.head()

In [None]:
subm_df.to_csv('submit_test.csv', index = False)