<a href="https://colab.research.google.com/github/marmal88/Cars/blob/main/Cars.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Car Identification dataset

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
jupyter nbconvert Cars.ipynb --to python
pipreqs --force .
!pip install -r requirements.txt

SyntaxError: invalid syntax (3716757621.py, line 1)

# Preliminary EDA from csv

This notebook was done on google colab, pathing on notebook might differ from existing repository data structure.

In [None]:
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import numpy as np
import os

In [None]:
names_df = pd.read_csv('/content/drive/MyDrive/cars/annotations/class_names.csv', header=None)
names_df.rename(columns={0:"class_names"}, inplace=True)
names_df.index = (np.arange(1, len(names_df) + 1))

data_df = pd.read_csv('/content/drive/MyDrive/cars/annotations/cars_annos.csv', sep=";")

In [None]:
df = pd.merge(data_df, names_df, how='inner', left_on="class", right_index=True)
col = {x:x.lower() for x in df.columns}
df.rename(columns=col, inplace=True)

df.head()

Unnamed: 0,image,x1,y1,x2,y2,class,test,class_names
0,000001.jpg,112,7,853,717,1,0,AM General Hummer SUV 2000
1,000002.jpg,48,24,441,202,1,0,AM General Hummer SUV 2000
2,000003.jpg,7,4,277,180,1,0,AM General Hummer SUV 2000
3,000004.jpg,33,50,197,150,1,0,AM General Hummer SUV 2000
4,000005.jpg,5,8,83,58,1,0,AM General Hummer SUV 2000


In [None]:
df.shape

(16185, 8)

In [None]:
# Check to see if the dataset split is already fair by ensuring stratification of classes over the train and test set
test_df = df.loc[df["test"]==1]
train_df = df.loc[df["test"]==0]

print(f"total number of classes entire dataset {df['class'].nunique()}")
print(f'total number of classes in test set {test_df["class"].nunique()}')
print(f'total number of classes in test set {train_df["class"].nunique()}')

total number of classes entire dataset 196
total number of classes in test set 196
total number of classes in test set 196


In [None]:
train_df.to_csv("/content/drive/MyDrive/cars/annotations/train_df.csv", index=False) # "data/annotations/train_df.csv"
train_df.to_csv("/content/drive/MyDrive/cars/annotations/test_df.csv", index=False)  # "data/annotations/test_df.csv"

In [None]:
# Random check to see if bounding boxes are able to completely cover the car in question

plt.figure(figsize=(20,20))
for i in range(5):
    rand = np.random.randint(len(df))
    img_name, x1, y1, x2, y2, _, _, class_names = df.iloc[rand,:]

    width = x2-x1
    height = y2-y1
    print(img_name, x1, y1, x2, y2, width, height, class_names)
    
    img = Image.open(f'/content/drive/MyDrive/cars/car_ims/{img_name}')
    ax=plt.subplot(1,5,i+1)
    plt.imshow(img)
    rect = Rectangle((x1, y1), width, height, linewidth=1.5, edgecolor='r', facecolor='none')
    plt.gca().add_patch(rect)
    plt.title(f"{class_names}")

plt.show()

007365.jpg 9 60 294 183 285 123 Dodge Dakota Crew Cab 2010


FileNotFoundError: ignored

<Figure size 2000x2000 with 0 Axes>

In [None]:
# Check to see if there are different modes in the dataset RGB vs RGBA, seems like all are jpg
df["image"].apply(lambda x: os.path.splitext(x)[1]).unique()

array(['.jpg'], dtype=object)

# Model Training



In [2]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
import torchvision
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T, utils 
import torchvision.transforms.functional as TF
import torch.nn as nn
from torch.optim import Adam
from torchvision.models import resnet101, ResNet101_Weights
from PIL import Image
import matplotlib.pyplot as plt
from typing import Dict
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm 

In [3]:
# set parameters for training
train_df_path = "/content/drive/MyDrive/cars/annotations/train_df.csv"
train_df = pd.read_csv(train_df_path) # data/annotations/train_df.csv

num_class = train_df["class"].nunique()
print(f"Number of classes is {num_class}")
epochs = 30
batch_size = 32

Number of classes is 196


In [4]:
class Dataset(Dataset):

    def __init__(self, csv_file:str, root_dir:str, transform=None, custom_crop=False)->Dict:
        """ Curated Dataset function
        Args:
            csv_file (str): Path to the csv file with annotations.
            root_dir (str): Directory with all the images.
            transform (_type_, optional): Optional transform to be applied on a sample. Defaults to None.
        Returns:
            Dict: Sample dictionary
        """        
        self.dataframe = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        self.custom_crop = custom_crop

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir, self.dataframe.image[idx])
        img = Image.open(img_name).convert("RGB")
        label = self.dataframe["class"][idx]
        left, top, right, bottom = self.dataframe.x1[idx], self.dataframe.y1[idx], self.dataframe.x2[idx], self.dataframe.y2[idx]
        
        if self.custom_crop:
            img = self._custom_crop(img, left, top, right, bottom)

        if self.transform:
            img = self.transform(img)

        return img, torch.tensor(int(label))

    def _custom_crop(self, img, left, top, right, bottom):
        """ Custom crop function to return the boundary box image
        Args:
            sample (Dict): dictionary with the same information
        Returns:
            Image: returns the cropped image
        """        
        width = right-left
        height = bottom-top
        img = TF.crop(img, top, left, height, width)

        return img


In [5]:
# normalize mean and std from imagenet pretrained
# mean and standard dev as per pre-trained imagenet dataset (https://pytorch.org/hub/pytorch_vision_resnet/)
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

transform = T.Compose([
                T.Resize([224,224]),
                T.ToTensor(),
                T.Normalize(mean=mean, std=std),
                ])

custom_crop = True

In [6]:
train_num = int(round(0.8*len(train_df)))
valid_num = int(round(0.2*len(train_df)))
print(f"Number of images in train set {train_num} and validation is {valid_num}")

path_to_data = os.path.join(os.getcwd(), "drive/MyDrive/cars/car_ims")
dataset = Dataset(csv_file=train_df_path, root_dir=path_to_data, transform=transform, custom_crop=custom_crop)

train_set, valid_set = torch.utils.data.random_split(dataset, [train_num, valid_num])

train_loader = DataLoader(train_set, batch_size=batch_size, num_workers=2, shuffle=True)
valid_loader = DataLoader(valid_set, batch_size=batch_size, num_workers=2, shuffle=True)

Number of images in train set 6515 and validation is 1629


In [7]:
# for i, (img, label) in enumerate(train_loader):
#     if i==1:
#       print(img)
#       print(label)
#       break

# Model Training

In [8]:
class ClassifierModel(nn.Module):

    def __init__(self, num_class:int):
        """ Initializes the ClassifierModel instance
            The super here inherits the functions from the base torch nn.Module, allowing 
            us to create layers and convolutions.
            Added a dropout to the last linear layer and amended out_features to num_class
        Args:
            num_class (int): The number of classes in the classification problem.
        """        
        super().__init__()
        self.model = torchvision.models.resnet101(weights=ResNet101_Weights.IMAGENET1K_V2)
        num_ftrs = self.model.fc.in_features
        self.model.fc = nn.Sequential(nn.Dropout(0.5), nn.Linear(num_ftrs, num_class))

    def forward(self, x:torch.Tensor)-> torch.Tensor:
        x = self.model(x)
        return x

In [9]:

model = ClassifierModel(num_class)
if torch.cuda.is_available():
    model.to("cuda")
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.001) 

Downloading: "https://download.pytorch.org/models/resnet101-cd907fc2.pth" to /root/.cache/torch/hub/checkpoints/resnet101-cd907fc2.pth
100%|██████████| 171M/171M [00:02<00:00, 88.8MB/s]


In [None]:
writer = SummaryWriter(log_dir="/content/drive/MyDrive/cars/runs")

for e in range(epochs):
    train_loss = 0.0
    for data, labels in tqdm(train_loader):
        if torch.cuda.is_available():
            data, labels = data.to("cuda"), labels.to("cuda")
        optimizer.zero_grad()
        target = model(data)
        loss = loss_fn(target,labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    print(f'Epoch {e+1} | Training Loss: {train_loss / len(train_loader)}')

    valid_loss = 0.0
    valid_correct = 0
    model.eval()    
    for data, labels in valid_loader:
        if torch.cuda.is_available():
            data, labels = data.cuda(), labels.cuda()
        
        target = model(data)
        loss = loss_fn(target,labels)
        valid_loss = loss.item() * data.size(0)

        _, predicted = torch.max(target.detach(), 1)
        valid_correct += (predicted == labels).sum().item()

    valid_loss /= len(valid_loader)
    valid_acc = (valid_correct / len(valid_loader.dataset)) * 100
    writer.add_scalar("Loss/Val", valid_loss, e)
    writer.add_scalar("Acc/Val", valid_acc, e)

    print(f'Epoch {e+1} | Training Loss: {train_loss:.6f} | Validation Loss: {valid_loss:.6f} | Validation Accuracy: {valid_acc:.2f}%')
    
    if min_valid_loss > valid_loss:
        print(f'Validation Loss Decreased ({min_valid_loss:.6f} ---> {valid_loss:.6f}) | Saving The Model')
        min_valid_loss = valid_loss
        
        if custom_crop:
            name = f"car_model_{valid_acc:.2f}_crop"
        else:
            name = f"car_model_{valid_acc:.2f}_nocrop"
        torch.save(model.state_dict(), f"/content/drive/MyDrive/cars/models/{name}.pth")

writer.close()

  2%|▏         | 4/204 [04:23<3:01:31, 54.46s/it]

In [None]:
!pip install tensorboard
tensorboard --logdir="/content/drive/MyDrive/cars/runs"