# References

I referred to this blog about objection detection with mask-rcnn and applied that to this dataset about cars.
Link: https://machinelearningmastery.com/how-to-train-an-object-detection-model-with-keras/

# Importing and Installing libraries

For my code I am using this repository that supports Tensorflow 2+.

In [None]:
!git clone https://github.com/leekunhee/Mask_RCNN.git
!cd Mask_RCNN && python setup.py install

In [None]:
import os,sys
import pandas as pd
import numpy as np
from os import listdir
from numpy import zeros, asarray, expand_dims, mean
from matplotlib import pyplot

ROOT_DIR = os.path.abspath("./Mask_RCNN")
sys.path.append(ROOT_DIR) 

from mrcnn.utils import Dataset,extract_bboxes
from mrcnn.visualize import display_instances
from mrcnn.config import Config
from mrcnn.model import MaskRCNN
from mrcnn.utils import compute_ap
from mrcnn.model import load_image_gt
from mrcnn.model import mold_image

import warnings
warnings.filterwarnings("ignore")

# Dataset

First we will read from the csv and try to understand the format of data.

In [None]:
bb_df = pd.read_csv('../input/car-object-detection/data/train_solution_bounding_boxes (1).csv')

In [None]:
bb_df.head() #displaying the first couple of rows

In [None]:
bb_df.describe() #checking the count and overview of data

In [None]:
bb_df.nunique() #count of unique values in the dataset

As you can see from above that total unique rows for image is less than total number of rows meaning we have some images repeated multiple times i.e some images have multiple cars in them. Which is why some images are repeated to give the data about their multiple bounding boxes.

In [None]:
class CarsDataset(Dataset):
    '''
    Dataset class to load the images and their bounding boxes in the form of masks
    '''
    def load_dataset(self, dataset_dir='../input/car-object-detection/data', mode='train'):
        '''
        This function is used to load the dataset. We will only use 500 images for training the rest are for validation.
        We also have test set for which we dont have labels but are useful for visually checking 
        for how effective the training was
        '''
        self.add_class('dataset',1,'car')
        if mode=='train':
            images_dir = dataset_dir + '/training_images/'
            for i in range(500):
                image_id = bb_df.iloc[i,0]
                img_path = images_dir + image_id
                self.add_image('dataset', image_id=image_id, path=img_path)
        if mode=='val':
            images_dir = dataset_dir + '/training_images/'
            for i in range(500,len(bb_df)):
                image_id = bb_df.iloc[i,0]
                img_path = images_dir + image_id
                self.add_image('dataset', image_id=image_id, path=img_path)
        if mode=='test':
            images_dir = dataset_dir + '/testing_images/'
            for filename in listdir(images_dir):
                image_id = filename
                img_path = images_dir + filename
                self.add_image('dataset', image_id=image_id, path=img_path)
        
    def extract_boxes(self, filename):
        '''
        To get the coordinates of the bounding boxes.
        '''
        boxes = list()
        xmin = int(bb_df[bb_df['image']==filename].iloc[0,1])
        ymin = int(bb_df[bb_df['image']==filename].iloc[0,2])
        xmax = int(bb_df[bb_df['image']==filename].iloc[0,3])
        ymax = int(bb_df[bb_df['image']==filename].iloc[0,4])
        coors = [xmin, ymin, xmax, ymax]
        boxes.append(coors)
        width = 380
        height = 676
        return boxes, width, height
    def load_mask(self, image_id):
        '''
        Takes the co-ordinates and uses that to make it into a mask.
        '''
        info = self.image_info[image_id]
        file = info['id']
        boxes, w, h = self.extract_boxes(file)
        masks = zeros([w, h, len(boxes)], dtype='uint8')
        class_ids = list()
        for i in range(len(boxes)):
            box = boxes[i]
            row_s, row_e = box[1], box[3]
            col_s, col_e = box[0], box[2]
            masks[row_s:row_e, col_s:col_e, i] = 1
            class_ids.append(self.class_names.index('car'))
        return masks, asarray(class_ids, dtype='int32')
    
    def image_reference(self, image_id):
        info = self.image_info[image_id]
        return info['path']

In [None]:
#Loading all the datasets we will need.
train_set = CarsDataset()
train_set.load_dataset(mode='train')
train_set.prepare()
print('Train: %d' % len(train_set.image_ids))

val_set = CarsDataset()
val_set.load_dataset(mode='val')
val_set.prepare()
print('Validate: %d' % len(val_set.image_ids))
 
test_set = CarsDataset()
test_set.load_dataset(mode='test')
test_set.prepare()
print('Test: %d' % len(test_set.image_ids))

Here we will visualise some of the images from the dataset along side their masks.

In [None]:
def plot(num_img=5):
    for i in range(num_img):
        image_id = np.random.randint(0,len(train_set.image_ids))
        image = train_set.load_image(image_id)
        mask, class_ids = train_set.load_mask(image_id)
        pyplot.imshow(image)
        pyplot.imshow(mask[:, :, 0], cmap='gray', alpha=0.3)
        pyplot.show()

In [None]:
plot()

# Training

This config file contains a lot of important parameters for model training.

In [None]:
class CarsConfig(Config):
    NAME = "cars_cfg"
    NUM_CLASSES = 2 #Bckground is counted as class too so background + cars = 2 labels
    STEPS_PER_EPOCH = 200
    VALIDATION_STEPS = 20
    IMAGES_PER_GPU = 1
    IMAGE_MIN_DIM = 384
    IMAGE_MAX_DIM = 448
    
config = CarsConfig()

In [None]:
config.display() #list of all available configurations

In [None]:
model = MaskRCNN(mode='training', model_dir='./', config=config)

We are going to load pre-trained weights for this task. This will save us a lot of time because these algorithms can take a lot of time to converge

In [None]:
model.load_weights('../input/mask-rcnn-coco-weights/mask_rcnn_coco.h5', by_name=True, exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",  "mrcnn_bbox", "mrcnn_mask"])

In [None]:
model.train(train_set, val_set, learning_rate=config.LEARNING_RATE, epochs=10, layers='all')

# Evaluation

We need to define a seperate config file for predictions purposes.

In [None]:
class PredictionConfig(Config):
    NAME = "cars_cfg"
    NUM_CLASSES = 2
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    USE_MINI_MASK = False

In [None]:
cfg = PredictionConfig()
model = MaskRCNN(mode='inference', model_dir='./', config=cfg)

Loading the saved weights to perform inference.

In [None]:
for i in listdir():
    if i[:4]=='cars':
        path=i
model.load_weights('./'+path+'/mask_rcnn_cars_cfg_0010.h5', by_name=True)

Here we will calculate mean average precision for our model. To know in detail what it means try referring to this blog
https://towardsdatascience.com/map-mean-average-precision-might-confuse-you-5956f1bfa9e2

In [None]:
def evaluate_model(dataset, model, cfg):
    APs = list()
    for image_id in dataset.image_ids:
        image, image_meta, gt_class_id, gt_bbox, gt_mask = load_image_gt(dataset, cfg, image_id)
        scaled_image = mold_image(image, cfg)
        sample = expand_dims(scaled_image, 0)
        yhat = model.detect(sample, verbose=0)
        r = yhat[0]
        AP, _, _, _ = compute_ap(gt_bbox, gt_class_id, gt_mask, r["rois"], r["class_ids"], r["scores"], r['masks'])
        APs.append(AP)
    mAP = mean(APs)
    return mAP

In [None]:
train_mAP = evaluate_model(train_set, model, cfg)
print("Train mAP: %.3f" % train_mAP)
val_mAP = evaluate_model(val_set, model, cfg)
print("Validation mAP: %.3f" % val_mAP)

# Actual vs Predicted

Fianlly we will compare our model preformances by simply seeing how well it is detecting cars compared to the real bounding boxes.

In [None]:
def plot_actual_vs_predicted(dataset, model, cfg, n_images=5):
    for i in range(n_images):
        id = np.random.randint(0,len(dataset.image_ids))
        pyplot.figure(figsize=(50, 50))
        image = dataset.load_image(id)
        mask, _ = dataset.load_mask(id)
        scaled_image = mold_image(image, cfg)
        sample = expand_dims(scaled_image, 0)
        yhat = model.detect(sample, verbose=0)[0]
        pyplot.subplot(n_images, 2, i*2+1)
        pyplot.imshow(image)
        pyplot.title('Actual')
        for j in range(mask.shape[2]):
            pyplot.imshow(mask[:, :, j], cmap='gray', alpha=0.3)
        pyplot.subplot(n_images, 2, i*2+2)
        pyplot.imshow(image)
        pyplot.title('Predicted')
        ax = pyplot.gca()
        for box in yhat['rois']:
            y1, x1, y2, x2 = box
            width, height = x2 - x1, y2 - y1
            rect = Rectangle((x1, y1), width, height, fill=False, color='red')
            ax.add_patch(rect)
    pyplot.show()

In [None]:
from matplotlib.patches import Rectangle
plot_actual_vs_predicted(val_set, model, cfg)

Thank you for your time, and dont forget to upvote this notebook if you found it helpful.