# Libraries

In [None]:
# source: https://towardsdatascience.com/getting-started-with-pytorch-image-models-timm-a-practitioners-guide-4e77b4bf9055
!pip install timm

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

import os

import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch import nn, optim

import timm

from skimage import transform

%matplotlib inline

In [None]:
print('[INFO] Training Mode:')
train_on_gpu = torch.cuda.is_available()
if not train_on_gpu:
    print('\t->  Training on CPU')
else:
    print('\t-> Training on GPU')
    device = torch.device('cuda')

# Models

In [None]:
print('[INFO] Total Models:', len(timm.list_models()))
print('[INFO] Total Models pre-trained:', len(timm.list_models(pretrained=True)))
# print('[INFO] List Models pre-trained')
# timm.list_models(pretrained=True)

In [None]:
model_pretrained = timm.create_model('resnet50d', 
                                     pretrained=True,       # model pre trained
                                     in_chans=1,            # number of channel
                                     num_classes =10,       # number of class
                                     global_pool = 'max')   # pooling
x = torch.randn(1, 1, 224, 224)
print(f'[INFO] Final dimension: {model_pretrained(x).shape}')

print('[INFO] Config')
model_pretrained

In [None]:
print('[INFO] Model from TIMM')
model_pretrained.default_cfg

# Analysis

In [None]:
train = pd.read_csv("../input/digit-recognizer/train.csv")
print('[INFO] Train shape:', train.shape)
print('[INFO] DataFrame:')
train.head()

In [None]:
test = pd.read_csv("../input/digit-recognizer/test.csv")
print('[INFO] Test shape:', test.shape)
print('[INFO] DataFrame:')
test.head()

# Treatments

In [None]:
X_train = train.drop('label', axis=1).values.reshape(-1, 1, 28, 28).astype('float32')
y_train = train.label.values.astype(np.float32)
print('[INFO] Train shape:', X_train.shape)

X_test = test.values.reshape(-1, 1, 28, 28).astype('float32')
print('\n[INFO] Test shape:', X_test.shape)

In [None]:
print('[INFO] Normalization')
X_train = X_train/ 225 
X_test = X_test/ 225

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, 
                                                  test_size = 0.2, 
                                                  random_state = 42,
                                                  stratify = y_train)

print('[INFO] Split data:')
for i, j in zip([X_train, X_val, y_train, y_val], ['X_train', 'X_val', 'y_train', 'y_val']):
    print(f'\t-> {j}: {i.shape} | {i.dtype}')

In [None]:
featuresTrain = torch.from_numpy(X_train)
targetsTrain = torch.from_numpy(y_train).type(torch.LongTensor) 

featuresVal = torch.from_numpy(X_val)
targetsVal = torch.from_numpy(y_val).type(torch.LongTensor) 

featuresTest = torch.from_numpy(X_test)

print('[INFO] Shapes:')
vet_01 = [featuresTrain, targetsTrain, 
          featuresVal, targetsVal,
          featuresTest]
vet_02 = ['featuresTrain', 'targetsTrain', 
          'featuresVal', 'targetsVal',
          'featuresTest']
for i, j in zip(vet_01, vet_02):
    print(f'\t-> {j}: {i.shape} | {i.dtype}')

# Modeling

In [None]:
print('[INFO] Build tensors')
# Pytorch train and test sets
train = torch.utils.data.TensorDataset(featuresTrain,targetsTrain)
val = torch.utils.data.TensorDataset(featuresVal,targetsVal)
# test = torch.utils.data.TensorDataset(featuresTest)
#test = torch.tensor(featuresTest)
test = featuresTest.clone().detach().requires_grad_(True)

In [None]:
# Data loader
print('[INFO] Build data loader')
BATCH_SIZE = 200

train_loader = torch.utils.data.DataLoader(train,
                                    batch_size = BATCH_SIZE, 
                                    shuffle = False)
val_loader = torch.utils.data.DataLoader(val,
                                  batch_size = BATCH_SIZE, 
                                  shuffle = False)
test_loader = torch.utils.data.DataLoader(test,
                                   batch_size = BATCH_SIZE, 
                                   shuffle = False)

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.cnn = model_pretrained

    def forward(self, x):
        x = self.cnn(x)
        return x

model = Net()
print(model)

if train_on_gpu:
    model.cuda()

In [None]:
def Train(train_loader, model, criterion, optimizer):
        
    # Parameters
    train_loss = 0
    train_acc = 0
    size = len(train_loader.dataset)
    num_batches = len(train_loader)
    
    # switch to train mode
    model.train()
    
    for batch_size, (X, y) in enumerate(train_loader):
        
        # GPU
        if train_on_gpu:
            X = X.cuda()
            y = y.cuda()
        
        # Clear the gradients
        optimizer.zero_grad()
        
        # Forward pass
        forward_pass = model(X)
        
        # Calculate the loss
        loss = criterion(forward_pass, y)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # sum correct predictions
        y_pred = torch.argmax(forward_pass, axis=1)
        y_true = y.long().squeeze()
        
        train_acc += (y_pred == y_true).type(torch.float).sum().item()
        train_loss += loss.item()
           
    # Log
    train_acc /= size # epoch accuracy
    train_loss /= num_batches # epoch loss
#     print('[INFO] Train:')
#     print(f'\t-> Accuracy: {(100*train_acc):.4f} %') 
#     print(f'\t-> Avg loss: {train_loss:.4f}')
    
    return train_acc, train_loss
  

In [None]:
def Validate(val_loader, model, criterion, optimizer):
        
    # Parameters
    val_loss = 0
    val_acc = 0
    size = len(val_loader.dataset)
    num_batches = len(val_loader)
    
    # switch to evaluation mode
    model.eval()
    
    for batch_size, (X, y) in enumerate(val_loader):
        
        # GPU
        if train_on_gpu:
            X = X.cuda()
            y = y.cuda()
            
        # Forward pass
        forward_pass = model(X)
        
        # Calculate the loss
        loss = criterion(forward_pass, y)
        
        # sum correct predictions
        y_pred = torch.argmax(forward_pass, axis=1)
        y_true = y.long().squeeze()
        
        val_acc += (y_pred == y_true).type(torch.float).sum().item()
        val_loss += loss.item()

    # Log
    val_acc /= size # epoch accuracy
    val_loss /= num_batches # epoch loss
#     print('[INFO] Validation:')
#     print(f'\t-> Accuracy: {(100*val_acc):.4f} %') 
#     print(f'\t-> Avg loss: {val_loss:.4f}')
    
    return val_acc, val_loss

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                 mode = 'min',
                                                 patience = 5,
                                                 verbose = True,
                                                 factor = 0.8)
epochs = 150

val_loss_min = np.Inf
early_patience = 30
epochs_no_improve = 0

hist_train_acc = []
hist_val_acc = []
hist_train_loss = []
hist_val_loss = []
hist_lr = []

best_model_epoch = 0
best_model_acc = 0
best_model_loss = 0

verbose = 10

learning_decay = True

print('[INFO] Training...')
for epoch in range(1, epochs+1):
    
    train_acc, train_loss = Train(train_loader, model, criterion, optimizer)
    val_acc, val_loss = Validate(val_loader, model, criterion, optimizer)
    
    if learning_decay:
        scheduler.step(val_loss)
    
    for param_group in optimizer.param_groups:
        hist_lr.append(param_group['lr'])
        
    hist_train_acc.append(train_acc)
    hist_val_acc.append(val_acc)
    hist_train_loss.append(train_loss)
    hist_val_loss.append(val_loss)
    
    if (epoch == 1) or (epoch % verbose == 0) or (val_loss < val_loss_min):
        print(f'\n#################### EPOCH {epoch} ####################')
        print('[INFO] Train:')
        print(f'\t-> Accuracy: {(100*train_acc):.4f} %') 
        print(f'\t-> Avg loss: {train_loss:.5f}')
 
        print('[INFO] Validation:')
        print(f'\t-> Accuracy: {(100*val_acc):.4f} %') 
        print(f'\t-> Avg loss: {val_loss:.5f}')
        
    if val_loss < val_loss_min:
        val_loss_min = val_loss
        epochs_no_improve = 0
        best_model_epoch = epoch
        best_model_acc = val_acc
        best_model_loss = val_loss
        torch.save(model.state_dict(), 'best_model.pt')
        print('[INFO] Network improvement, saving current model')
    else:
        epochs_no_improve += 1
    
    if epoch > 5 and epochs_no_improve >= early_patience:
        print('\n',' '*20 , '~ END ~')
        print('\n[WARNING] Early Stopping')
        print('[INFO] Best Model (validation):')
        print(f'\t-> Epoch: {best_model_epoch}')
        print(f'\t-> Accuracy: {(100*best_model_acc):.4f} %') 
        print(f'\t-> Avg loss: {best_model_loss:.5f}')
        break
    else:
        continue

if epoch == epochs:
    print('\n',' '*20 , '~ END ~')
    print('\n[INFO] Best Model (validation):')
    print(f'\t-> Epoch: {best_model_epoch}')
    print(f'\t-> Accuracy: {(100*best_model_acc):.4f} %') 
    print(f'\t-> Avg loss: {best_model_loss:.5f}')
    

In [None]:
# # CNN model training
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters())

# epochs = 10
# valid_loss_min = np.Inf
# train_losses, valid_losses = [], []
# history_accuracy = []
# h_acc = []

# for e in range(1, epochs+1):
#     running_loss = 0
#     acc = 0

#     for images, labels in train_loader:
#         if train_on_gpu:
#             images, labels = images.cuda(), labels.cuda()
#         # Clear the gradients, do this because gradients are accumulated.
#         optimizer.zero_grad()
        
#         # Forward pass, get our log-probabilities.
#         ps = model(images)

#         # Calculate the loss with the logps and the labels.
#         loss = criterion(ps, labels)
        
#         # Turning loss back.
#         loss.backward()
        
#         # Take an update step and few the new weights.
#         optimizer.step()
        
#         running_loss += loss.item()
        
#         # Capturing the class more likely.
#         _, top_class = ps.topk(1, dim=1)
                
#         # Verifying the prediction with the labels provided.
#         equals = top_class == labels.view(*top_class.shape)
          
#         acc += torch.mean(equals.type(torch.FloatTensor))
        
#     else:
#         valid_loss = 0
#         accuracy = 0
        
#         # Turn off gradients for validation, saves memory and computations.
#         with torch.no_grad():
#             model.eval() # change the network to evaluation mode
#             for images, labels in val_loader:
#                 if train_on_gpu:
#                     images, labels = images.cuda(), labels.cuda()
#                 # Forward pass, get our log-probabilities.
#                 #log_ps = model(images)
#                 ps = model(images)
                
#                 # Calculating probabilities for each class.
#                 #ps = torch.exp(log_ps)
                
#                 # Capturing the class more likely.
#                 _, top_class = ps.topk(1, dim=1)
                
#                                 # Verifying the prediction with the labels provided.
#                 equals = top_class == labels.view(*top_class.shape)
                
#                 valid_loss += criterion(ps, labels).item()
#                 accuracy += torch.mean(equals.type(torch.FloatTensor))
                
#         model.train() # change the network to training mode
        
#         train_losses.append(running_loss/len(train_loader))
#         valid_losses.append(valid_loss/len(val_loader))
#         history_accuracy.append(accuracy/len(val_loader))
#         h_acc.append(acc/len(train_loader))
        
#         network_learned = valid_loss < valid_loss_min

#         if e == 1 or e % 5 == 0 or network_learned:
#             print(f"Epoch: {e}/{epochs}.. ",
#                   f"Training Loss: {running_loss/len(train_loader):.4f}.. ",
#                   f"Training Accuracy: {acc/len(train_loader):.4f}.. ",
#                   f"Validation Loss: {valid_loss/len(val_loader):.4f}.. ",
#                   f"Validation Accuracy: {accuracy/len(val_loader):.4f}")
            
#         if network_learned:
#             valid_loss_min = valid_loss
#             torch.save(model.state_dict(), 'model_mtl_mnist.pt')
#             print('Detected network improvement, saving current model')

In [None]:
# Viewing training information
e = [i for i in range(1, epoch+1)]

fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20,5))
fig.suptitle('Metrics')

ax1.plot(e, hist_train_acc, label = 'Train')
ax1.plot(e, hist_val_acc, label = 'Validation')
ax1.axvline(x = best_model_epoch, color='red', linestyle='--')
# ax1.axhline(y = best_model_acc, color='red', linestyle='--')
ax1.set_title('Model Accuracy')
ax1.set(xlabel ='Epoch', ylabel='Accuracy')
ax1.legend()

ax2.plot(e, hist_train_loss, label ='Train')
ax2.plot(e, hist_val_loss, label ='Validation')
ax2.axvline(x = best_model_epoch, color='red', linestyle='--')
# ax2.axhline(y = best_model_loss, color='red', linestyle='--')
ax2.set_title('Model Loss')
ax2.set(xlabel ='Epoch', ylabel='Loss')
ax2.legend()

ax3.plot(e, hist_lr, label ='Learning Rate')
ax3.axvline(x = best_model_epoch, color='red', linestyle='--')
ax3.set_title('Model Learning Rate')
ax3.set(xlabel ='Epoch', ylabel='Learning Rate')
ax3.legend()

plt.show()

In [None]:
# model.load_state_dict(torch.load('best_model.pt'))

# print(model)

# model = Net()
# print(model)

# if train_on_gpu:
#     model.cuda()

In [None]:
def softmax(x):
    return np.exp(x)/np.sum(np.exp(x), axis=1)[:, None]

def Inference(test_loader, model):
    
    # Parameters
    size = len(test_loader.dataset)
    num_batches = len(test_loader)
    predictions = []
    
    model = Net()
    if train_on_gpu:
        model.cuda()
        
    model.load_state_dict(torch.load('best_model.pt'))
    
    # switch to evaluation mode
    model.eval()
    
    with torch.no_grad():
        for batch_size, X in enumerate(test_loader):
        
            # GPU
            if train_on_gpu:
                X = X.cuda()

            # Forward pass
            forward_pass = model(X)

            # Predcitions
            y_pred = softmax(forward_pass.detach().cpu().numpy())
            y_pred = np.argmax(y_pred, axis=1)

            predictions.append(y_pred)
            
    print('[INFO] Inferences')        
    inferences = np.concatenate(predictions, axis=0)
    
    return inferences

def Submission(inferences):
    submission = pd.read_csv('../input/digit-recognizer/sample_submission.csv')
    submission['Label'] =  inferences
    submission.to_csv("submission.csv",index=False)
    print('[INFO] Submission Complete')
    print(submission.head(10))
    


In [None]:
preditions = Inference(test_loader, model)
Submission(preditions)

In [None]:
# # def softmax(x):
# #     return np.exp(x)/np.sum(np.exp(x), axis=1)[:, None]

# submission = [['ImageId', 'Label']]
# model.load_state_dict(torch.load('best_model.pt'))

# with torch.no_grad():
#     model.eval()
#     image_id = 1

#     for images in test_loader:
#         if train_on_gpu:
#             image = images.cuda()
#         log_ps = model(image)
#         ps = torch.exp(log_ps)
#         top_p, top_class = ps.topk(1, dim=1)
        
#         for prediction in top_class:
#             submission.append([image_id, prediction.item()])
#             image_id += 1
            
# print(len(submission) - 1)

In [None]:
# import csv

# with open('submission.csv', 'w') as submissionFile:
#     writer = csv.writer(submissionFile)
#     writer.writerows(submission)
    
# print('[INFO] Submission Complete')