STEP1 - IMPORTED ALL NECESSARY LIBRARIES

In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import copy
from PIL import Image

import torch
import torch.nn as nn
import torchvision
import torchvision.datasets as datasets
import torchvision.models as models
from torchvision import transforms
from sklearn.metrics import confusion_matrix, f1_score, recall_score, precision_score, classification_report, accuracy_score
from torch.utils.data import random_split
import csv

STEP2- CHECKED FOR GPU

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

driver_imgs_list.csv contains a list of training images, their subject id and class label id. 

In [None]:
data = pd.read_csv("../input/state-farm-distracted-driver-detection/driver_imgs_list.csv")
data.head()

In [None]:
data['subject'].value_counts()

We get to know that there are 26 different drivers in training images..

STEP3- LOADED THE DATASET AND DIVIDED INTO TEST AND TRAIN SET.

In [None]:
data.shape

In [None]:

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
dataset = torchvision.datasets.ImageFolder(root='../input/state-farm-distracted-driver-detection/imgs/train', transform=transform)

In [None]:
train, test = random_split(dataset, [15696, 6728])

In [None]:
batch_size = 32
trainloader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
testloader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=True)

In [None]:
dataiter = iter(trainloader)
images, labels = dataiter.next()

print(images.shape)#input image shape
print(labels.shape)#label shape

In [None]:
#classes is a list of labels for all 10 classes.
classes = ['safe driving', 'texting - right', 'talking on the phone - right', 'texting - left', 'talking on the phone - left', 'operating the radio', 'drinking', 'reaching behind', 'hair and makeup', 'talking to passenger']

STEP4- BUILT THE EVALUATION AND TRAINING FUNCTION TO BE FURTHER USED BY THE MODELS.

In [None]:
#evaluation function used to calculate accuracy of a model.
def evaluation(dataloader, model):
    total, correct = 0, 0
    model.eval()
    for data in dataloader:
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        _, pred = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (pred == labels).sum().item()
    return 100 * correct / total

In [None]:
#train function to train a particular model. Here we have set epochs as 8.
def train(model):
    loss_epoch_arr = []
    max_epochs = 8
    min_loss = 1000
    for epoch in range(max_epochs):
        for i, data in enumerate(trainloader, 0):
            inputs,labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            opt.zero_grad()
            model.train()
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)
            loss.backward()
            opt.step()
            if min_loss > loss.item():
                min_loss = loss.item()
                best_model = copy.deepcopy(model.state_dict())
                print('Min loss %0.2f' % min_loss)
            
            del inputs, labels, outputs
            torch.cuda.empty_cache()
        loss_epoch_arr.append(loss.item())
        model.eval()
        print('Epoch: %d/%d, Test acc: %0.2f, Train acc: %0.2f' % (
        epoch, max_epochs, 
        evaluation(testloader, model), evaluation(trainloader, model)))
    plt.plot(loss_epoch_arr)
    plt.show()

STEP5- CREATED MODELS-

OUR FIRST MODEL IS BUILT USING TRANSFER LEARNING. A RESNET MODEL IS LOADED AND SOME EXTRA LAYERS ARE ADDED TO THE END. WE NAMED THIS AS 'model1'.

In [None]:
#Downloaded resnet model
model1 = torchvision.models.resnet50(pretrained=True, progress=True)

In [None]:
#We have loaded pretrained model..so we have set the parameters as no training required.
for p in model1.parameters():
    p.requires_grad = False

In [None]:
#Added some layers in model1
model1.fc = nn.Sequential(nn.Linear(2048, 1024),
                           nn.ReLU(),
                           nn.Linear(1024, 512),
                           nn.ReLU(),                       
                           nn.Linear(512, 10),
)

In [None]:
#The layer added needs training 
for param in model1.parameters():
    if param.requires_grad:
        print(param.shape)

In [None]:
#Trained model1
import torch.optim as optim
learning_rate = 0.001
momentum = 0.9
model1 = model1.to(device)
opt = optim.Adam(model1.parameters(), lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()
train(model1)

BUILT SECOND MODEL WHICH CONTAINS VARIOUS CONVOLUTIONAL LAYERS AND LINEAR LAYERS. WE NAMED IT AS 'model2'

In [None]:
# second model
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.convblock = nn.Sequential(
            nn.Conv2d(3, 32, 5, stride = (1,1), padding = 2),   #(N,3,224,224)-> (N,32,224,224)
            nn.BatchNorm2d(32), 
            nn.ReLU(),
            nn.MaxPool2d(2,2),     #(N,32,224,224)->(N,32,112,112)
            nn.Conv2d(32, 64, 5, stride = (1,1), padding = 2),  #(N,32,112,112)->(N,64,112,112)
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2,2),     #(N,64,112,112)->(N,64,56,56)
            nn.Conv2d(64, 128, 5, stride = (1,1), padding = 2), #(N,64,56,56) ->(N,128,56,56)
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2,2),    #(N,128,56,56)->(N,128,28,28)
            nn.Conv2d(128, 256, 5, stride = (1,1), padding = 2), #(N,128,28,28)->(N,256,28,28)
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2,2),  #(N,256,28,28)->(N,256,14,14)
            nn.Conv2d(256, 256, 5, stride = (1,1), padding = 2), #(N,256,14,14)->(N,256,14,14)
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2,2)   #(N,256,14,14)->(N,256,7,7)
            
            
        )
        self.denseblock = nn.Sequential(
            nn.Linear(12544, 1024),
            nn.ReLU(),
            nn.Linear(1024,512),
            nn.ReLU(),
            nn.Linear(512,10)
        )
        
    def forward(self, x):
        x = self.convblock(x)
        x = x.view(x.size(0),-1)
        x = self.denseblock(x)
        return x

In [None]:
#Trained model2
model2 = Net()
model2 = model2.to(device)
learning_rate = 0.001
momentum = 0.9
model1 = model1.to(device)
opt = optim.Adam(model2.parameters(), lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()
train(model2)

STEP6- COMPARED THE MODELS BY PERFORMANCE METRICS.

In [None]:
#A function to plot confusion matrix.
def plot_confusion_matrix(cm, classes,
                          normalize=True,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    """
    import itertools
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')
    plt.figure(figsize=(20,10))

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

In [None]:
#A function used to get PRECISION, RECALL, F1_SCORE and CONFUSION MATRIX.
def metrics(model):
    y_true = []
    y_pred = []
    for data in testloader:
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        _, pred = torch.max(outputs.data, 1)
        for i in range(len(labels)):
            y_true.append(classes[labels[i]])
        for i in range(len(pred)):
            y_pred.append(classes[pred[i]])
    f1 = f1_score(y_true, y_pred, average='weighted')
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    acc = accuracy_score(y_true, y_pred)
    print(classification_report(y_true, y_pred))
    print("Precision is ", round(precision,2))
    print("Recall is ", round(recall,2))
    print("f1 score is ", round(f1,2))
    print("accuracy score is ", round(acc,2))
    cm = confusion_matrix(y_true, y_pred, labels = classes)
    plot_confusion_matrix(cm, classes)

In [None]:
print("Performance metrics of model made using transfer learning-")
metrics(model1)

In [None]:
print("Performance metrics of self-made model-")
metrics(model2)