In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm # this is a bar for the outputs
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Digit Classifier with PyTorch using Transfer Learning and Data Augmentation

In [None]:
dataset = pd.read_csv('../input/digits-mini-dataset-5500/drawings.csv')
dataset.head()

In [None]:
# select the feature columns
data = dataset.drop(['label'], axis=1).values.astype('float32')
# select the final column, the labels
labels = dataset.values[:,-1].astype('float32')

data.shape, labels.shape

In [None]:
# a useful function to plot images
def plot_images(imgs, dims, figsize, title_size=22, preds=[]):
    plt.figure(figsize=figsize)
    for img, i, in zip(imgs, np.arange(imgs.shape[0])):
        cmap = 'gray'
        # if there are not predictions the title is this
        title = f'Image {i+1}'
        # if there are predictions
        if preds != []:
            title = f'Real: {preds[i][0]}, Pred: {preds[i][1]}'
            # change the color if the prediction is wrong
            cmap = ('gray' if preds[i][0] == preds[i][1] else 'magma')
        # select the plot position
        plt.subplot(dims[0], dims[1], i+1)
        # plot the image
        plt.imshow(np.squeeze(img), cmap=cmap)
        plt.axis('off')
        plt.title(title, fontsize=title_size)
    plt.show()

# Data Processing and Augmentation

In [None]:
from albumentations import Compose, Blur, GridDistortion

# define the transformations function
transform = Compose([
    Blur(always_apply=False, p=1.0, blur_limit=(3, 6)),
    GridDistortion(always_apply=False, p=1.0, num_steps=5, distort_limit=(-0.3, 0.3), interpolation=0, border_mode=0, value=(0, 0, 0), mask_value=None)
])

# select an image
sample_image = data[1].reshape(1,28,28)
augmented_sample = []
augmented_sample.append(sample_image)

# transform the image
for _ in range(4*4-1):
    augmented_sample.append(transform(image=sample_image)["image"])

plot_images(np.array(augmented_sample), dims=(4,4), figsize=(15,15))

In [None]:
# Then create more data and more labels
per_image = 15 # how many transformed images per image

new_data = []
new_labels = []

# a bar for the transformation process
bar = tqdm(range(1, labels.shape[0]+1))

# iterate the bar, and the data,labels
for _, (image, label) in zip(bar, zip(data, labels)):
    # reshape the image
    image = image.reshape(1,28,28)
    # append original the image
    new_data.append(image)
    # append the new labels
    new_labels.append(label)
    # transform the image in new images
    for _ in range(per_image):
        # create the transformed image
        new_image = transform(image=image)["image"]
        # append the new image
        new_data.append(new_image)
        # append the label
        new_labels.append(label)

# convert to numpy arays
new_data = np.array(new_data)
new_labels = np.array(new_labels)
# see the new shapes
new_data.shape, new_labels.shape

## Split the Data

In [None]:
# split the data
x_train, x_test, y_train, y_test = train_test_split(new_data, new_labels, test_size=.1, random_state=13)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=.1, random_state=13)

# save all the data in a dic
data_dic = {
    'x_train': x_train, 'y_train': y_train,
    'x_test': x_test, 'y_test': y_test,
    'x_val': x_val, 'y_val': y_val,
}

data_dic['x_train'].shape, data_dic['x_test'].shape, data_dic['x_val'].shape

## Create the Dataset Class

In [None]:
class DS(Dataset):
    ############ BASIC FUNCTIONS
    
    def __init__(self, data, labels):
        self.process_data(data)
        self.process_labels(labels)
    
    def __len__(self):
        return self.labels.shape[0]
    
    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]
    
    ############ PROCESSING FUNCTIONS
    
    def process_data(self, data):
        # convert the data to pytorch tensor
        self.data = torch.from_numpy(data)
        # reshape the data
        batch_dim = self.data.shape[0] # obtain the batch dim
        self.data = self.data.view(batch_dim, 1, 28, 28)/2 # and normalize
        # the data is between 1 and 0
    
    def process_labels(self, labels):
        self.labels = torch.from_numpy(labels.astype('int64'))
        

## USE THE CLASS
data_sets = { # not augmented datasets
    'train': DS(data_dic['x_train'], data_dic['y_train']),
    'test': DS(data_dic['x_test'], data_dic['y_test']),
    'val': DS(data_dic['x_val'], data_dic['y_val']),
}

# how is the shape of the images and labels
data_sets['train'][:3][0].shape, data_sets['train'][:3][1].shape

In [None]:
# and the dtypes
data_sets['train'][1][0].dtype, data_sets['train'][1][1].dtype

In [None]:
# the max value of a sample batch
max(list(data_sets['val'][:100][0].reshape(-1)))

## Dataloaders

In [None]:
# now the dataloaders
batch_size = 128

data_loaders = {
    'train': DataLoader(
        dataset=data_sets['train'],
        batch_size=batch_size,
        shuffle=False, # the datasets are already shuffled
    ),
    'test': DataLoader(
        dataset=data_sets['test'],
        batch_size=batch_size,
        shuffle=False
    ),
    'val': DataLoader(
        dataset=data_sets['val'],
        batch_size=batch_size,
        shuffle=False
    ),
}

# The Model and Device

In [None]:
# define the model class
class Network(nn.Module):
    # all the layers of the model
    def __init__(self):
        super(Network, self).__init__()
        self.conv = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5)
        self.pool = nn.MaxPool2d(2)
        # DENSE LAYERS
        # the first para is like the input_shape, the seccond the number
        # of outputs, like the number of neurons
        self.dense1 = nn.Linear(32*12*12, 256)
        self.dense2 = nn.Linear(256, 256)
        self.dense3 = nn.Linear(256, 10)
        # DROPOUT AND BATCH NORM LAYER
        self.dropout = nn.Dropout(0.2)
    
    # define the model data flow with the layers
    def forward(self, x):
        # convolutional layer
        x = self.pool( # third, max pooling
                F.relu( # seccond, relu function
                    self.conv(x) # first, convolution
                ))
        # apply the flatten process, conserving the batch dim
        x = torch.flatten(x,1)
        # dense layers
        x = self.dropout(F.relu(self.dense1(x)))
        x = self.dropout(F.relu(self.dense2(x)))
        x = self.dense3(x)
        return x

# if the gpu is available use it, if not, use the cpu
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = Network()
model = model.to(device)

# this is to check if and how the model flow works
# model(torch.randn(2,1,28,28).to(device))
device

In [None]:
def plot_train(h, epochs):
    # get an array with the number of epochs
    x = np.arange(epochs)
    plt.figure(figsize=(20,10))
    # plot the loss
    plt.subplot(1,2,1)
    plt.plot(x, h['loss'], label='train loss')
    plt.plot(x, h['val loss'], label='val loss')
    plt.grid(True)
    # plot the accuracy
    plt.subplot(1,2,2)
    plt.plot(x, h['accuracy'], label='train accuracy')
    plt.plot(x, h['val accuracy'], label='val accuracy')
    plt.grid(True)
    # 
    plt.legend()
    plt.show()
    

# define a metric
def check_accuracy(model, loader, criterion, train=True):
    num_correct = 0
    num_samples = 0
    model.eval() # is like swich the model mode, this changes the
    # behave of layers like Dropouts Layers, BatchNorm Layers

    with torch.no_grad(): # deactivcate the back propagation,
    # it will reduce memory and speed up computations

        for x,y in loader:
            # move the data and targets to the device
            x = x.to(device)
            y = y.to(device)
            # obtain the scores
            scores = model(x)
            # calculate the loss function
            loss_ = criterion(scores, y)
            loss_ = loss_.item()
            # we ned the max from the second dim
            _, preds = scores.max(1)
            # select the correct preds and sum them
            num_correct += (preds == y).sum()
            # count the num of samples
            num_samples += preds.shape[0]
    
    # calculate the accuracy, float since numbers are tensors
    acc_ = float(num_correct) / float(num_samples)
    if train:
        print(f'>> loss: {loss_} - accuracy: {acc_}')
    else:
        print(f'==>> val loss: {loss_} - val accuracy: {acc_}')

    # switch the model to train mode
    model.train()
    return loss_, acc_

In [None]:
def fit(model, loaders, epochs, criterion, optimizer):
    model.train() # put the model on train mode
    train_loader, val_loader = loaders
    hist = { # to save the training data
        'accuracy': [],
        'val accuracy': [],
        'loss': [],
        'val loss': [],
    }
    # define a bar for the train epochs
    bar = tqdm(range(1, epochs+1))
    # start the training loop
    for epoch in bar:
        # variables for each epoch
        _loss = []
        _acc = []
        for x_batch, y_batch in train_loader:
            # move the data and labels to the device
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            # predict
            y_pred = model(x_batch)
            # calculate the loss
            loss = criterion(y_pred, y_batch)
            # clear the gradient
            optimizer.zero_grad()
            # back prop of the loss
            loss.backward()
            # update the weights
            optimizer.step()
            
        # evaluate the model on train set
        l_t, a_t = check_accuracy(model, train_loader, criterion)
        # evaluate the model on val set, get acc and loss
        l_v, a_v = check_accuracy(model, val_loader, criterion, train=False)
        # apped the data
        hist['accuracy'].append(a_t)
        hist['loss'].append(l_t)
        hist['val accuracy'].append(a_v)
        hist['val loss'].append(l_v)
        
        # look for the best model
        if epoch > 1:
            past_acc = hist['val accuracy'][epoch-2]
            actual_acc = hist['val accuracy'][epoch-1]
            # if the models reach a higher acc
            if actual_acc > past_acc:
                # save that accuracy
                torch.save(model.state_dict(), './model.pth')
                saved_acc = actual_acc
        
    # finally use the best model reached
    model = Network()
    model = model.to(device)
    # load the model
    model.load_state_dict(torch.load('./model.pth'))
    print(f'Loaded checkpoint with {saved_acc} acc')
    return hist

# define the optimizer and the loss functions
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

epochs = 25

# train the model
history = fit(
    model, (data_loaders['train'], data_loaders['val']),
    epochs, criterion, optimizer
)

In [None]:
plot_train(history, epochs)

# Evaluate the Model

In [None]:
results = check_accuracy(model, data_loaders['test'], criterion)

In [None]:
# to decode from one hot (predictions) to numbers
def decode(labels): # recieve a torch tensor, a prediction
    # select the indexs of the max elements in each label
    decoded = torch.argmax(labels, dim=1).numpy()
    return decoded

def flatten2d(arr):
    res = []
    for row in arr:
        for i in row:
            res.append(i)
    return res

# obtain the labels for the confusion matrix
labels = []
preds = []

for x,y in data_loaders['test']:
    # move the data for
    x = x.to(device)
    # predict and return to cpu
    p = model(x).to('cpu')
    # decode and append
    preds.append(list(decode(p)))
    # append the labels
    labels.append(list(y.numpy()))

# flatten the lists
labels = flatten2d(labels)
preds = flatten2d(preds)

labels[:10], preds[:10]

In [None]:
# define the matrix with the real classes and the predicted
m = confusion_matrix(labels, preds)
# the labels for the plot
labs = np.arange(10)
plt.figure(figsize=(20, 8))
# create the plot
heatmap = sns.heatmap(m, xticklabels=labs, yticklabels=labs, annot=True, fmt='d', color='blue')
# labels for the axes
plt.xlabel('Predicted Label')
plt.ylabel('Real Label')
plt.show()

# Conclusion
This dataset is pretty useful to practice deep learning. Having 5500 images is a good quantity of data, but using data augmentation it's possible to multiplicate the quantity of data. Also I think that since those augmentations transform the original images in others that are harder to classify, then the model might classify easier the original images, those that has no tranformations. Let's try it :3

In [None]:
# these are the original images
data = dataset.drop(['label'], axis=1).values.astype('float32')
labels = dataset.values[:,-1].astype('float32')

data.shape, labels.shape

In [None]:
# then create a new dataset and dataloader
ds = DS(data, labels)
loader = DataLoader(ds, batch_size=1, shuffle=True)
# finally check the acuracy
_ = check_accuracy(model, loader, criterion)

Yes, it has higher accuracy and lower loss than the test loader. Thanks for the dataset!! :D