## Model Training

#### In the following section we use a pre-trained ResNet model to apply transfer learning and fine-tune it for our problem.

We first start by importing useful libraries for our project.

In [None]:
import torchvision.models as models
from torchvision import transforms
from torchvision import datasets
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch
from torchvision.io import read_image
import torch.optim as optim
from torch.optim import lr_scheduler

import numpy as np
import pandas as pd

import os
from tqdm.notebook import tqdm

from datetime import datetime

import matplotlib.pyplot as plt
from PIL import Image
import seaborn as sns

One of the best available pre-trained models for tasks similar to ours is the ResNet model. Here we use a pre-trained ResNet50 model and fine-tune it in order to solve our problem. Our dataset contains 10 classes and only the last layer will be trained and have its parameters changed.

In [None]:
model = models.resnet50(pretrained = True)

for param in model.parameters():
    param.requires_grad = False

model.fc = nn.Linear(2048, 10)

for param in model.layer4.parameters():
    param.requires_grad = True

model.eval()

Images are resized to fit in the ResNet50 architecture, and normalized for faster computations`.

In [None]:
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)

data = datasets.ImageFolder('/home/josephmoussa/Desktop/content/imgs/train', transform = transforms.Compose([
                                                                       transforms.Resize((224, 224)),
                                                                       transforms.ToTensor(),
                                                                       transforms.Normalize(mean, std)]))

In [None]:
csv = '/home/josephmoussa/Desktop/content/driver_imgs_list.csv'
img = '/home/josephmoussa/Desktop/content/imgs/train'

In the cell below we create a dataset class, build our dataset and split it into training and validation data.

In [None]:
class Data(Dataset):
    def __init__(self, csv_to_read = csv, image_dir = img, transform = transforms.Compose([transforms.Resize((224, 224)),
                                                                       transforms.ToTensor(),
                                                                       transforms.Normalize(mean, std)]), 
                                                            target_transform = None):
      self.img_labels = pd.read_csv(csv_to_read)
      self.img_dir = image_dir
      self.transform = transform
      self.target_transform = target_transform

    def __len__(self):
      return len(self.img_labels)

    def __getitem__(self, idx):
      label = self.img_labels.iloc[idx, 1]
      img_dir_cat = self.img_dir + '/' + str(label)
      img_path = os.path.join(img_dir_cat, self.img_labels.iloc[idx, 2])
      image = Image.open(img_path)
        
      if self.transform:
        image = self.transform(image)

      if self.target_transform:
        label = self.target_transform(label)

      return image, label

In [None]:
data = Data(csv_to_read = csv, image_dir = img)

In [None]:
train_data_length = int(0.8 * len(data))
val_data_length = len(data) - train_data_length
lengths = [train_data_length, val_data_length]
train_data, val_data = torch.utils.data.random_split(data, lengths)

The following function calculates the loss, useful for the training process. The cross-entropy loss is used, as advised by Kaggle.

In [None]:
def calculate_loss(model, dataloader_test, batch_size):
    loss_fn = nn.CrossEntropyLoss()
    loss_epoch_test = 0
    
    with torch.no_grad():
        for inputs, labels in dataloader_test:
            target = torch.zeros((min(len(labels), batch_size)))
    
            for i in range(min(len(labels), batch_size)):
                target[i] = int(labels[i][1])
            target = target.type(torch.cuda.LongTensor)
            target = target.to(cuda_cpu)
    
            inputs = inputs.type(torch.cuda.FloatTensor)
            inputs = inputs.to(cuda_cpu)

            outputs = model(inputs)
            loss = loss_fn(outputs, target)
            loss_epoch_test += loss
            
    return loss_epoch_test.item()

In our case we had access to a GPU, which allowed for faster computations. The success rate function is used later to measure our model's performance.

In [None]:
cuda_cpu = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def success_rate(model, val_data, batch_size):
    N = len(val_data)
    counter = 0
    
    with torch.no_grad():
        for inputs, labels in val_data:
            inputs = inputs.type(torch.cuda.FloatTensor)
            inputs = inputs.to(cuda_cpu)
            outputs = model(inputs)
            
            target = torch.zeros((min(len(labels), batch_size)))
            for i in range(min(len(labels), batch_size)):
                target[i] = int(labels[i][1])
            
            i = 0
            
            for output in outputs:
                if torch.argmax(output) == int(target[i]):
                    counter = counter + 1
                i = i + 1
            l = len(target)
          
    rate = (counter / ((N-1) * batch_size + l)) * 100
    
    return rate

With everything ready, we can now train our model, choosing the parameters giving us the best possible overall performance.

In [None]:
def train(model, loss_fn, epochs = 5, batch_size = 200):
    
    model = model.cuda()
    optimizer = optim.Adam(model.parameters())
    dataloader_train = DataLoader(train_data, batch_size = batch_size, shuffle=True, num_workers=2)
    dataloader_test = DataLoader(val_data, batch_size = batch_size, shuffle=True, num_workers=2) 
    training_loss = []
    validation_loss = []
    success_rate_ = [10]

    for epoch in tqdm(range(epochs), desc = "Total Progress: "):
        loss_epoch_train = 0
        loss_epoch_test = 0
        
        for inputs, labels in dataloader_train:
            target = torch.zeros((min(len(labels), batch_size)))
    
            for i in range(min(len(labels), batch_size)):
                target[i] = int(labels[i][1])
            target = target.type(torch.cuda.LongTensor)
            target = target.to(cuda_cpu)
    
            inputs = inputs.type(torch.cuda.FloatTensor)
            inputs = inputs.to(cuda_cpu)
      
            optimizer.zero_grad()

            outputs = model(inputs)
            loss = loss_fn(outputs, target)
            loss_epoch_train += loss
            loss.backward()
            optimizer.step()
            
        print("Training Loss: {0}".format(loss_epoch_train.cpu().detach()))
        training_loss.append(loss_epoch_train.cpu().detach())
    
        val_loss = calculate_loss(model, dataloader_test, batch_size)
        print("Validation Loss: {0}".format(val_loss))
        validation_loss.append(val_loss)
     
        accuracy_rate = success_rate(model, dataloader_test, batch_size)
        print("Model accuracy: {0} %".format(accuracy_rate))
        success_rate_.append(accuracy_rate)
        
        print("Epoch done.")
        print("-------------------------------------")
            
    
    plt.plot(training_loss)
    plt.xlabel("Epoch number")
    plt.ylabel("Training Loss")
    plt.title("Evolution of training loss with number of epochs")
    plt.show()
    
    plt.plot(validation_loss)
    plt.xlabel("Epoch number")
    plt.ylabel("Validation Loss")
    plt.title("Evolution of validation loss with number of epochs")
    plt.show()
    
    plt.plot(success_rate_)
    plt.xlabel("Epoch number")
    plt.ylabel("Model Accuracy (%)")
    plt.title("Evolution of our model's accuracy with number of epochs")
    plt.show()
    
    return model

In [None]:
torch.cuda.empty_cache()
model = train(model, loss_fn = nn.CrossEntropyLoss(), epochs = 5, batch_size = 128)

We can save the trained model thanks to the lines of code below.

In [None]:
main_directory = '/home/josephmoussa/Desktop/'
file_date = datetime.now().strftime("%Y%m%d_%H%M%S")
name = "ResNet50"
torch.save(model.state_dict(), os.path.join(main_directory, name + file_date + '_weights_final.h5'))

## Using Trained Model

#### The following section can be run independently. After saving our trained model in the previous section, we will now use it and evaluate its performance on a test dataset given by Kaggle.

In [None]:
import torchvision.models as models
from torchvision import transforms
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import torch
import numpy as np
import pandas as pd
import os

from torchvision.io import read_image
import torch.optim as optim
from torch.optim import lr_scheduler

from tqdm.notebook import tqdm
from PIL import Image
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
model = models.resnet50(pretrained = True)
model.fc = nn.Linear(2048, 10)

main_directory = '/home/josephmoussa/Desktop/'
model.load_state_dict(torch.load(os.path.join(main_directory, 'ResNet50_20_epochs20220518_211515_weights_res.h5')))
model.eval()

mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)

In [None]:
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
csv = '/home/josephmoussa/Desktop/content/driver_imgs_list.csv'
img = '/home/josephmoussa/Desktop/content/imgs/train'

class Data(Dataset):
    def __init__(self, csv_to_read = csv, image_dir = img, transform = transforms.Compose([transforms.Resize((224, 224)),
                                                                       transforms.ToTensor(),
                                                                       transforms.Normalize(mean, std)]), 
                                                            target_transform = None):
      self.img_labels = pd.read_csv(csv_to_read)
      self.img_dir = image_dir
      self.transform = transform
      self.target_transform = target_transform

    def __len__(self):
      return len(self.img_labels)

    def __getitem__(self, idx):
      label = self.img_labels.iloc[idx, 1]
      img_dir_cat = self.img_dir + '/' + str(label)
      img_path = os.path.join(img_dir_cat, self.img_labels.iloc[idx, 2])
      image = Image.open(img_path)
        
      if self.transform:
        image = self.transform(image)

      if self.target_transform:
        label = self.target_transform(label)

      return image, label

csv = '/home/josephmoussa/Desktop/content/driver_imgs_list.csv'
img = '/home/josephmoussa/Desktop/content/imgs/train'
data = Data(csv_to_read = csv, image_dir = img)

train_data_length = int(0.8 * len(data))
val_data_length = len(data) - train_data_length
lengths = [train_data_length, val_data_length]

train_data, val_data = torch.utils.data.random_split(data, lengths)

cuda_cpu = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In the following cell we read and modify the sample submission file given by Kaggle.

In [None]:
sample_submission = pd.read_csv("/home/josephmoussa/Desktop/content/sample_submission.csv")
len_sample = len(sample_submission)

for i in tqdm(range(len_sample), desc = "Total Progress: "):
    
    transform = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor(),
                                   transforms.Normalize(mean, std)])
    img = Image.open("/home/josephmoussa/Desktop/content/imgs/test/" + sample_submission.img[i])
    img = transform(img)
    img = img.to(cuda_cpu).unsqueeze(0)
    output = model(img)

    pred = torch.nn.Softmax(dim = 1)(output).cpu().detach().numpy()[0]

    for class_number in range(10):
      sample_submission.at[i, "c" + str(class_number)] = pred[class_number]
    
    if i == 30:
        print(sample_submission.head(30))

sample_submission.head(20)

In [None]:
sample_submission.to_csv("/home/josephmoussa/Desktop/submission_sample_4_resnet50.csv", index = False)

## Model Evaluation

#### In the following section we plot the model's confusion matrix in order to analyze its performance according to every class.

In [None]:
def conf_mat(model, val_data, batch_size):
    nb_classes = 10
    confusion_matrix = np.zeros((nb_classes, nb_classes))
    dataloader_test = DataLoader(val_data, batch_size = batch_size, shuffle=True, num_workers=2)

    model = model.to(cuda_cpu)
    with torch.no_grad():
        for inputs, labels in tqdm(dataloader_test, desc = "Total Progress: "):
            inputs = inputs.type(torch.cuda.FloatTensor)
            inputs = inputs.to(cuda_cpu)
            outputs = model(inputs)

            for i in range(min(len(outputs), batch_size)):
                pred = torch.argmax(outputs[i]).item()
                int_label = int(labels[i][1])
                confusion_matrix[int_label][pred] += 1

    plt.figure(figsize=(15,10))

    class_names = ["Safe driving", "Texting-right", "Talking on the phone-right", 
               "Texting-left", "Talking on the phone-left", "Operating the radio", 
               "Drinking", "Reaching behind", "Hair and Makeup", 
               "Talking to passenger"]
    
    df_cm = pd.DataFrame(confusion_matrix, index = class_names, columns = class_names).astype(int)
    heatmap = sns.heatmap(df_cm, annot=True, fmt="d")

    heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right',fontsize = 8)
    heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right',fontsize = 8)

    # Per-class accuracy
    for i in range(nb_classes):
        class_accuracy = 100 * confusion_matrix[i][i] / confusion_matrix[i].sum()
        print("For class c{0}, the accuracy is equal to {1} %".format(i, class_accuracy))
        print("---------------------------")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.title("Confusion Matrix")
    plt.show()

In [None]:
conf_mat(model, val_data, batch_size = 256)