In [None]:
import numpy as np
import pandas as pd


import torch
from torch.utils.data.dataset import Dataset
from torch.utils.data.dataloader import DataLoader
from torch import nn, optim

from torchvision import transforms, models
from torchvision.utils import make_grid

from sklearn.metrics import confusion_matrix, precision_score, recall_score
from scipy.io import loadmat
from PIL import Image
from tqdm import tqdm

import os

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
print(os.listdir('../input/'))
path_devkit = '../input/devkit-car-dataset/'
path_train_img = '../input/stanford-cars-dataset/cars_train/cars_train/'
path_test_img = '../input/stanford-cars-dataset/cars_test/cars_test/'

NUM_CLASSES = 196
IMG_SIZE = 224
RANDOM_SEED = 42

In [None]:
# lets store the information in the dataframe
mat = loadmat(path_devkit+'cars_meta.mat')

classes = []
for C in mat['class_names'][0]:
    classes.append(C[0])
classes = pd.DataFrame(classes,columns=['class_name'])

#blankidx = [''] * len(classes)
#classes.index = blankidx
classes.head()

In [None]:
# lets store the data in the dataFrame
mat = loadmat(path_devkit+'cars_train_annos.mat')
train_df = []
for row in mat['annotations'][0]:
    train_df.append([row[0][0][0],    # min x
                     row[1][0][0],    # max x
                     row[2][0][0],    # min y
                     row[3][0][0],    # max y
                     row[4][0][0],    # class
                     row[5][0]])   # file name


train_df = pd.DataFrame(train_df, columns=['min_x', 'min_y', 'max_x', 'max_y','class_name', 'file_name'])
train_df.head()

In [None]:
# lets store the test data in the dataFrame
mat = loadmat(path_devkit+'cars_test_annos_withlabels.mat')
test_df = []
for row in mat['annotations'][0]:
    test_df.append([row[0][0][0],    # min x
                    row[1][0][0],    # max x
                    row[2][0][0],    # min y
                    row[3][0][0],    # max y
                    row[4][0][0],    # class
                    row[5][0]])      # file name

test_df = pd.DataFrame(test_df, columns=['min_x', 'min_y', 'max_x', 'max_y', 'class_name', 'file_name'])
test_df.head()

In [None]:
# now split the train dataset into val and train
val_df = train_df.sample(frac=0.2, random_state=RANDOM_SEED)
train_df = train_df.drop(val_df.index)

print('Training set: {}\nValidation set: {}'.format(train_df.shape, val_df.shape))

train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)

In [None]:
img = plt.imread(path_train_img+'00001.jpg')
plt.imshow(img)
print(classes.iloc[13])

In [None]:
# do some EDA
train_df.describe()

In [None]:
# print(train_df.info())
# types={'min_x':np.int16, 'min_y':np.int16, 'max_x':np.int16, 'max_y':np.int16,'class_name':np.int8}
# train_df = train_df.astype(types)
# print(train_df.info())

In [None]:
class CarDataset(Dataset):
    def __init__(self,df, classes, transforms=None, mode='train'):
        self.df = train_df
        self.classes = classes
        self.transforms = transforms
        self.mode = mode
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        if self.mode == 'train':
            img_name = self.df.iloc[index]['file_name']
            img = Image.open(path_train_img+img_name).convert('RGB')   # to create images with 3 channels
#             create one hot vector for the label
            label = np.zeros(196)
            label[self.df.iloc[index]['class_name'] - 1] = 1

        # add test part later
        

        #crop image
        img = img.crop((self.df.iloc[index]['min_x'],
                        self.df.iloc[index]['min_y'],
                        self.df.iloc[index]['max_x'],
                        self.df.iloc[index]['max_y']))
        
        if self.transforms is not None:
            img =  self.transforms(img)
            
            return img, label

In [None]:
transform = transforms.Compose([transforms.Resize((224,224)),
                                transforms.RandomHorizontalFlip(),
                                transforms.ToTensor()])
                                # normalisation later

train_dataset = CarDataset(train_df, classes, transforms = transform)
train_generator = DataLoader(train_dataset, batch_size=32, shuffle=True)

# validation data generator
val_dataset = CarDataset(val_df, classes, transforms = transform)
val_generator = DataLoader(val_dataset, batch_size=32, shuffle=True)

# test data generator
test_dataset = CarDataset(test_df, classes, transforms = transform)
test_generator = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [None]:
images, labels = next(iter(train_generator))
print('Image shape: {}\nLabels shape: {}'.format(images.shape, labels.shape))

In [None]:
plt.figure(num=None, figsize=(8, 8), dpi=100, facecolor='w', edgecolor='k')
grid = make_grid(images,nrow=8)
plt.imshow(grid.numpy().transpose((1,2,0)))
plt.axis('off')
#plt.title(labels)

In [None]:
# network
model = models.densenet121(pretrained=True)
model.classifier = nn.Sequential(nn.Linear(1024,NUM_CLASSES),
                                 nn.Sigmoid())                    # need to change this to softmax as we also need to predict the confidence score.


# class customDensenet121(nn.Module):
#     def __init__(self,num_classes):
#         super(customDensenet121,self).__init__()
#         self.features = nn.Sequential(*list(model.features.children()))
#         self.classifier = nn.Sequential(nn.Linear(1024,num_classes),
#                                         nn.Sigmoid())
        
#     def forward(self, x):
#         x = self.features(x)
#         x = x.view(x.size(0),-1)
#         x = self.classifier(x)
#         return x

In [None]:
use_gpu = False
if torch.cuda.is_available:
    model.cuda()
    use_gpu = True

In [None]:
# helper functions
def accuracy(y, pred_y, dim=1):
    _, y_index = torch.max(y, dim=dim)
    _, pred_y_index = torch.max(pred_y, dim=dim)
    num_correct = torch.sum(pred_y_index==y_index)
    acc = num_correct.item()/len(y)
    return acc


def precision(y, pred_y, dim=1):
    _, y_index = torch.max(y, dim=dim)
    _, pred_y_index = torch.max(pred_y, dim=dim)
    precision = precision_score(y_index.numpy(), pred_y_index.numpy(), average='micro')
    return precision

def recall(y, pred_y, dim=1):
    _, y_index = torch.max(y, dim=dim)
    _, pred_y_index = torch.max(pred_y, dim=dim)
    recall = recall_score(y_index.numpy(), pred_y_index.numpy(),average='micro')
    return recall
    

In [None]:
def train_model(model, epochs, train_generator, val_generator, use_gpu, optimizer, criterion):
    train_loss, val_loss = [],[]
    train_acc, val_acc = [], []
    for epoch in range(epochs):
        running_loss = 0
        for images, labels in train_generator:
            if use_gpu:
                labels = labels.cuda()
                labels = labels.type(torch.cuda.FloatTensor)
                images = images.cuda()
            
            optimizer.zero_grad()
            output = model(images)
            loss = criterion(output,labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            
        else:
            acc = 0
            prec = 0
            rcl = 0
            test_loss = 0
            with torch.no_grad():
                model.eval()
                for images, labels in val_generator:
                    if use_gpu:
                        images = images.cuda()
                        labels = labels.cuda()
                        labels = labels.type(torch.cuda.FloatTensor)
                        
                    output = model(images)
                    test_loss += criterion(output, labels)
                    output = output.cpu()
                    labels = labels.cpu()
                    acc += accuracy(labels, output)
                    prec += precision(labels, output)
                    rcl += recall(labels, output)
                    # add metrics here like accuracy, precision and recall as mentioned in the problem statement
                    # ps = torch.exp(logps)
        
        if (epoch+1) % 5 == 0:
            torch.save(model.state_dict(), 'checkpoint_epoch_{}.pth'.format(epoch+1))
            
        train_loss.append(running_loss/len(train_generator))
        val_loss.append(test_loss/len(val_generator))
        val_acc.append(acc/len(val_generator))
        model.train()
        
        print('Epoch: {}/{}'.format(epoch+1,epochs),
              'Training Loss: {}\t\tValidation Loss: {}'.format(train_loss[-1], val_loss[-1]),
              'Val Accuracy:  {}'.format(val_acc[-1]),
              'Val Precision: {}'.format(prec/len(val_generator)),
              'Val Recall:    {}'.format(rcl/len(val_generator)),sep='\n')
    
    return train_loss,val_loss,val_acc

In [None]:
epochs = 2

criterion = nn.BCELoss()
optimizer = optim.Adam([{'params': model.features.parameters()},
                     {'params': model.classifier.parameters(), 'lr': 0.001}],
                    lr=0.0001)

train_losses, val_losses, val_acc = train_model(model,epochs,train_generator, val_generator, 
                                       use_gpu, optimizer, criterion)

In [None]:
plt.plot(train_losses, label='Training loss')
plt.plot(val_losses, label='Validation loss')
plt.legend(frameon=False)

In [None]:
plt.plot(val_acc, label='Val Accuracy')
plt.legend(frameon=False)

In [None]:
def test_accuracy(model, test_generator, use_gpu):
    acc = 0
    model.eval()
    for images,labels in test_generator:
        if use_gpu:
            images = images.cuda()
            labels = labels.cuda()
            labels = labels.type(torch.cuda.FloatTensor)
            
        output = model(images)
        acc += accuracy(output, labels)
        
    return(acc/len(test_generator))

# call 
test_accuracy(model, test_generator, use_gpu)    