In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# !pip install torchsummary

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
from torch.optim.lr_scheduler import  ReduceLROnPlateau
import torchvision.transforms as transforms
import seaborn as sns
import matplotlib.pyplot as plt
import albumentations as A
from albumentations.pytorch import ToTensorV2
import copy
#from torchsummary import summary # model summary

# Load and Visualize Dataset

In [None]:
train = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')

In [None]:
train.head()

In [None]:
train.shape

In [None]:
#split traing and val
X = train.drop(['label'], axis=1)
y = train['label']

In [None]:
fig, ax = plt.subplots(figsize=(7,5))
sns.countplot(x = y, ax = ax)
ax.set_title('Train count labels')


The graph shows of train data is balanced. Now we look for digit images.

In [None]:
digits = y.unique()
idx_digits = [y[y == digit].index[0] for digit in digits]

def plotImages(idx_images):
    fig,ax = plt.subplots(2,5, figsize=(15,10))
    
    line = 0
    col = 0
    for i,id_image in enumerate(idx_images):
        img = X.loc[id_image].values
        img = img.reshape((28,28))
        if i == 5:
            line += 1
            col = 0 
        ax[line,col].imshow(img,cmap='gray')
        ax[line,col].set_title(digits[i])
        col+=1
        
plotImages(idx_digits)   

# Class Config

In [None]:
class Config():
    
    #NN Arquiteture Parameters
    convKernelSize = 3
    poolKernelSize = 2
    numKernels_1 = 64#64
    numKernels_2 = 32
    hiddenLayer = 256
    features = 10
    
    #Hyperparametrs
    batchSize = 1024
    epochs = 40
    criterion = nn.CrossEntropyLoss()
    learning_rate = 1e-3
    plateau_factor = 0.5
    plateau_patience = 3
    
    random_state = 42
    testSize = 0.25
    
    probability = 0.6
   

# Class Dataset
Reference by = https://www.kaggle.com/hinepo/pytorch-tutorial-cv-99-67-lb-99-26#Dataset-class

In [None]:
### for training and validation
class DigitDataset(Dataset):
    def __init__(self, df, X_col, y_col, augmentations = None):
        self.features = df[X_col].values/255 # scale (greyscale) only features. do not scale target
        self.targets = df[y_col].values.reshape((-1, 1))
        self.augmentations = augmentations 

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        image = self.features[idx].reshape((1,28, 28))
        label = self.targets[idx]

        if self.augmentations is not None:
            #augmented = self.augmentations(image=image)   
            return torch.FloatTensor(self.augmentations(image=image)['image']), torch.FloatTensor(label)
        else:
            return torch.FloatTensor(image), torch.FloatTensor(label)


### for inference
class DigitInferenceDataset(Dataset):
    def __init__(self, df, augmentations = None): # for inference we only have the features dataframe
        self.features = df.values/255 # scale (greyscale) features
        self.augmentations = augmentations 

    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        image = self.features[idx].reshape((1, 28, 28))
        return torch.FloatTensor(image)

# Data Augumentation
With Alblumentation library

Tutorial for image classification: https://albumentations.ai/docs/examples/pytorch_classification/ 

Documentation: https://albumentations.ai/docs/

Reference by = https://www.kaggle.com/hinepo/pytorch-tutorial-cv-99-67-lb-99-26#Dataset-class

In [None]:
transform_train = A.Compose([

#                   A.Rotate(limit=60, p=1), 
                  A.ShiftScaleRotate(rotate_limit=45, p=Config.probability),
                  A.Downscale(scale_min=0.7, scale_max=0.7, p=Config.probability),
                  A.MotionBlur(p=0.2),
                  A.Affine(scale=0.5,p=Config.probability), 
                  A.Affine(rotate=(-10,10),p=Config.probability), 

])


transform_val = A.Compose([


])



# CNN -  Model

In [None]:
class NetCnn(nn.Module):
    def __init__(self):
        super().__init__()
        #convolutional layers
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=Config.numKernels_1, kernel_size=Config.convKernelSize,padding=0)
        self.conv2 = nn.Conv2d(in_channels=Config.numKernels_1, out_channels=Config.numKernels_2, kernel_size=Config.convKernelSize,padding=0)
        #batch normalization
        self.bnorm1 = nn.BatchNorm2d(num_features=Config.numKernels_1)
        self.bnorm2 = nn.BatchNorm2d(num_features=Config.numKernels_2)
        self.bnorm3 = nn.BatchNorm1d(num_features=Config.hiddenLayer)

        #Pooling and Flattening
        self.pool = nn.MaxPool2d(kernel_size=Config.poolKernelSize)
        self.flatten = nn.Flatten()

        '''
        output= (input-filter+1)/stride
        conv1 : (28 - 5 +1)/1 = 24x24
        pooling1: 24x24 / 2 = 12x12
        conv2: (12-5+1)/1 = 8x8
        pooling2: 8x8 / 2 = 4x4 = final_img_shape
        dense layer input = final_img_shape* n_features_maps = 4x4xnumKernels_2 
        '''
        #layers of dense network 800->128->128->10
        self.fc1 = nn.Linear(in_features=Config.numKernels_2*5*5, out_features=Config.hiddenLayer)
        self.fc2 = nn.Linear(Config.hiddenLayer,Config.hiddenLayer)
        self.out = nn.Linear(Config.hiddenLayer,Config.features)

        #dropout
        self.dropout = nn.Dropout(p=0.25)

        #activation function 
        self.activation = nn.ReLU()
    def forward(self,x):
        #print('Entrou')
        #Convolutional 1
        out  = self.conv1(x)
        #print('conv1')
        out = self.activation(out)
        #print('Activation')
        #Max poll 1
        out = self.bnorm1(out)
        out = self.pool(out)
        #print('Camada um completa')
        #print(out.shape)
        
        #Convolutional 2
        out  = self.conv2(out)
        #print('conv2')
        out = self.activation(out)
        #print('Activation')
        #Max poll 2
        out = self.bnorm2(out)
        out = self.pool(out)
        #print('Camada um completa')
        #print(out.shape)
        #flatten
        out = self.flatten(out)
        #print(out.shape)
        #print('flatten completa')
        
        #print(out.shape)
        #Dense Layer 1
        out = self.fc1(out)
        out = self.bnorm3(out)
        #Dropout 1
        #out = self.dropout(out)
        
#         #Dense Layer 2
#         out = self.fc2(out)
#         out = self.bnorm3(out)
       
        #Dropout 2
        #out = self.dropout(out)
        
        
        out = self.out(out)
        
        return out

# Split Data

In [None]:
print(f'Complet Data {train.shape}')

train_df, val_df = train_test_split(train, test_size = Config.testSize, random_state=Config.random_state)
    
print('\nSplit Data')
print('\nTrain',train_df.shape)  
print('Val',val_df.shape) 

In [None]:
train_df.head()

## Visualizing augumentation

In [None]:
y_col = "label"
X_col = [c for c in train_df.columns if c != 'label']
train_dataset_not_aug = DigitDataset(train_df, X_col, y_col, augmentations = None)
#train_loader_not_aug = torch.utils.data.DataLoader(train_dataset_not_aug, batch_size = Config.batchSize, shuffle = True)

# Pytorch train and test sets
train_dataset = DigitDataset(train_df, X_col, y_col, augmentations = transform_train)
#train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = Config.batchSize, shuffle = True)

print('Original Lenght ', len(train_dataset_not_aug))
print('Augumentation Lenght ', len(train_dataset), '\n')

start = 10000
end= start + 10
fi,ax = plt.subplots(end-start,2, figsize = (10,(end-start)*3))

for i in range(start,end):
    for j in range(0,1):
        image, label = train_dataset_not_aug.__getitem__(start + 1)
        image_aug, label_aug = train_dataset.__getitem__(start + 1)
       
        
        #original image
        ax[i-start, j].imshow(np.squeeze(image), cmap='gray')
        ax[i-start, j].set_title(f'Label: {label.item()}', fontsize=14)
        ax[i-start, j].axis('off')
        #augumentation image
        ax[i-start, j + 1].imshow(np.squeeze(image_aug), cmap='gray')
        ax[i-start,  j + 1].set_title(f'Label: {label_aug.item()}',fontsize=14)
        ax[i-start,  j + 1].axis('off')
plt.suptitle('Analisys Augumentations', fontsize=20)
plt.show()
        

# Train

In [None]:
torch.manual_seed(Config.random_state)

# batch_size, epoch and iteration
batch_size = Config.batchSize
#n_iters = 2500
# num_epochs = n_iters / (len(X_train) / batch_size)
num_epochs = Config.epochs

# Pytorch train and test sets
train_dataset = DigitDataset(train_df, X_col, y_col, augmentations = transform_train)
val_dataset = DigitDataset(val_df,  X_col, y_col,  augmentations = transform_val)
# train_dataset = DigitDataset(train_df, X_col, y_col, augmentations = None)
# val_dataset = DigitDataset(val_df,  X_col, y_col,  augmentations = None)

# data loader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = Config.batchSize, shuffle = True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size = Config.batchSize, shuffle = True)
    
# Create CNN
#model = DigitModel()
model = NetCnn()
device = torch.device('cuda') if torch.cuda.is_available else torch.device('cpu')
#device =torch.device('cpu')
# Cross Entropy Loss 
criterion =  Config.criterion

# SGD Optimizer
learning_rate = Config.learning_rate

optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor = Config.plateau_factor, patience = Config.plateau_patience, verbose = True)

## Train and Validation Functions

In [None]:
def train_model(train_loader,device):
    model.to(device)
    model.train()
    
    train_loss = 0.0
    for data, labels in train_loader:

        #data = Variable(data)
        #labels = Variable(labels) 
        # Clear the gradients
        data, labels = data.to(device), labels.to(device).long().squeeze()
        optimizer.zero_grad()
        # Forward Pass
        target = model(data)
        # Find the Loss
        loss = criterion(target,labels)
        # Calculate gradients
        loss.backward()
        # Update Weights
        optimizer.step()
        # Calculate Loss
        train_loss += loss.item()
            
    loss_ep = loss/len(train_loader)
    #loss_ep= loss.data()
            
    return loss_ep

def val_model(val_loader,device):
    valid_loss = 0.0
    model.eval()     # Optional when not using Model Specific layer
    total = 0
    correct = 0
    with torch.no_grad():
        for data, labels in val_loader:
    #         data = Variable(data)
    #         labels = Variable(labels) 
            # Forward Pass

            data, labels = data.to(device), labels.to(device).long().squeeze()
            target = model(data)
            # Find the Loss
            loss = criterion(target,labels)
            # Calculate Loss
            valid_loss += loss.item()
            predicted = torch.max(target, axis=1)[1]

            # Total number of labels
            total += len(labels)

            correct += (predicted == labels).type(torch.float).sum().item()
            
    acc_ep = correct/total
    loss_ep = loss/len(val_loader)
    #loss_ep= loss.data()
    
    return acc_ep,loss_ep 
    
    

## Model Train

In [None]:
loss_train_list = []
loss_val_list = []
acc_list = []
min_acc = 0
min_loss = np.inf
for epoch in range(0, num_epochs):
    loss_train = train_model(train_loader,device).cpu().detach().numpy()
    
    
    accuracy, loss_val = val_model(val_loader,device)
    loss_val = loss_val.cpu().detach().numpy()

    loss_train_list.append(loss_train)
    loss_val_list.append(loss_val)
    acc_list.append(accuracy)   
    
    
    #step schedule LR
    scheduler.step(loss_val)

    
    if accuracy > min_acc:
        best_model = copy.deepcopy(model)
        min_acc = accuracy
    
    
    log_epoch = 'Epoch {} \t Training Loss: {:.5f} \t Validation Loss: {:.5f} \t Accuracy: {:.5f}'.format(epoch+1,loss_train,loss_val,accuracy)
    print(log_epoch)

## Plot Results

In [None]:
fig, ax = plt.subplots(2,1,figsize=(20,15))
epoch_max_acc = np.argmax(acc_list) 
epoch_min_loss_val = np.argmin(loss_val_list) 
ax[0].plot(loss_train_list, label='Loss Train', linewidth = 2.5)
ax[0].plot(loss_val_list, label='Loss Val',linewidth = 2.5)
ax[0].axvline(x = epoch_min_loss_val, color = 'r', linestyle = 'dashed', label = 'Best val Loss')
ax[0].legend(fontsize=14)
ax[0].set_title('Loss', fontsize=16)
#ax[0].set(xlim=(15,30))

ax[1].set_title('Accuracy',fontsize=16,)
ax[1].plot(acc_list[1:], '--b',linewidth = 2.5)
ax[1].axvline(x = epoch_max_acc, color = 'r', linestyle = 'dashed', label = 'Best val Acc')
ax[1].legend()
plt.show()

# Subimission

In [None]:
test.head()

In [None]:
def softmax(x):
    return np.exp(x)/np.sum(np.exp(x), axis=1)[:, None]

In [None]:
# instantiate Inference Dataset class (create inference Dataset)
inference_dataset = DigitInferenceDataset(test, augmentations=None)

# create Inference DataLoader object from Dataset class object
inference_dataloader = DataLoader(inference_dataset,
                                  batch_size = Config.batchSize,
                                  shuffle = False)

In [None]:
list_predictions = list()
model.eval()
with torch.no_grad():
        for data in inference_dataloader:
    #         data = Variable(data)
    #         labels = Variable(labels) 
            # Forward Pass

            data = data.to(device)
            predictions = model(data)
            # Find the Loss
            y_pred = softmax(predictions.detach().cpu().numpy())
       
            predicted = np.argmax(y_pred, axis=1)

            list_predictions.append(predicted)

In [None]:
list_predictions_final = np.concatenate(list_predictions, axis = 0)

In [None]:
submission = pd.read_csv('/kaggle/input/digit-recognizer/sample_submission.csv')
submission['Label'] = list_predictions_final
submission.head(10)

In [None]:
submission.to_csv('./submission.csv',index = False)

In [None]:
pd.read_csv('./submission.csv')