<h3>Setting up Environment</h3>

In [None]:
cd /kaggle/input/mask-rcnn

In [None]:
!pip3 install -r requirements.txt

In [None]:
cp -r /kaggle/input/mask-rcnn/Mask_RCNN /kaggle/working/

In [None]:
cd /kaggle/working/Mask_RCNN/Mask_RCNN

In [None]:
import tensorflow as tf
tf.__version__

In [None]:
!python setup.py install

<h3><center>1. Importing Libraries</center></h3>

In [None]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import os

from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array

from mrcnn.config import Config
from mrcnn.model import MaskRCNN

import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

<h3><center>2. Configure Model</center></h3>

<div style="font-family:verdana; word-spacing:1.7px;">
    First, the model must be defined via an instance of the MaskRCNN class. This class requires a configuration object as a parameter. The configuration object defines how the model might be used during training or inference. In this case, the configuration will only specify the number of images per batch, which will be one, and the number of classes to predict. You can see the full extent of the configuration object and the properties that you can override in the config.py file.</div>

In [None]:
# define the test configuration
class TestConfig(Config):
    NAME = "test"
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    NUM_CLASSES = 1 + 80

rcnn = MaskRCNN(mode='inference', model_dir='/kaggle/working/Mask_RCNN/Mask_RCNN/',
                config=TestConfig())

In [None]:
# draw an image with detected objects
def draw_image_with_boxes(filename, boxes_list):
    # load the image
    data = plt.imread(filename)
    # plot the image
    plt.imshow(data)
    # get the context for drawing boxes
    ax = plt.gca()
    # plot each box
    for box in boxes_list:
         # get coordinates
        y1, x1, y2, x2 = box
         # calculate width and height of the box
        width, height = x2 - x1, y2 - y1
         # create the shape
        rect = Rectangle((x1, y1), width, height, fill=False, color='red')
        # draw the box
        ax.add_patch(rect)
    # show the plot
    plt.show()

<h3><center>3. Load Weights & Detect</center></h3>

<div style="font-family:verdana; word-spacing:1.7px;">
The next step is to load the weights. Now we can make a prediction for our image. <br><br>  We can then make a prediction with the model. Instead of calling predict() as we would on a normal Keras model, will call the detect() function and pass it the single image.<br><br>
    The result contains a dictionary for each image that we passed into the detect() function.The keys of the dictionary of note are as follows:
    <br>
    <ul>
        <li>‘rois’: The bound boxes or regions-of-interest (ROI) for detected objects.
            <li>‘masks’: The masks for the detected objects.
                <li>‘class ids’: The class integers for the detected objects.
                    <li>‘scores’: The probability or confidence for each predicted class.
    </ul>
</div>

In [None]:
WEIGHTS = '/kaggle/input/mask-rcnn/mask_rcnn_coco.h5'
IMG_PATH = '/kaggle/input/mask-rcnn/elephant.jpg'

In [None]:
rcnn.load_weights(WEIGHTS, by_name=True)

img = load_img(IMG_PATH)

img = img_to_array(img)

results = rcnn.detect([img], verbose=0)

In [None]:
draw_image_with_boxes(IMG_PATH, results[0]['rois'])

<h3><center>1. Parse Annotation File</center></h3>

In [None]:
ANNOT_PATH = '/content/drive/MyDrive/Helmet_Detection.v1i.coco/_trainannotations.coco.json'
Helmet_PATH = '/content/drive/MyDrive/Helmet_Detection.v1i.coco/train'

In [None]:
import xml.dom.minidom

dom = xml.dom.minidom.parse(ANNOT_PATH + '00001.xml')
pretty_xml_as_string = dom.toprettyxml()

print(pretty_xml_as_string)


<div style="font-family:verdana; word-spacing:1.7px;">
 We can see that the annotation file contains a size element that describes the shape of the photograph, and object elements describe the bounding boxes for the helmet objects in the image.   
    </div>

In [None]:
from xml.etree import ElementTree

def extract_boxes(filename):
    # load and parse the file
    tree = ElementTree.parse(filename)
    # get the root of the document
    root = tree.getroot()

    boxes = list()

    for box in root.findall('.//bndbox'):
        xmin = int(box.find('xmin').text)
        ymin = int(box.find('ymin').text)
        xmax = int(box.find('xmax').text)
        ymax = int(box.find('ymax').text)

        coors = [xmin, ymin, xmax, ymax]
        boxes.append(coors)

    width = int(root.find('.//size/width').text)
    height = int(root.find('.//size/height').text)

    return boxes, width, height

boxes, w, h = extract_boxes(ANNOT_PATH + '00001.xml')
print(boxes, w, h)

In [None]:
# Paths to the COCO format dataset
train_path = '/content/drive/MyDrive/Helmet_Detection.v1i.coco/train'
val_path = '/content/drive/MyDrive/Helmet_Detection.v1i.coco/valid'
train_annotation = '/content/drive/MyDrive/Helmet_Detection.v1i.coco/_trainannotations.coco.json'
val_annotation = '/content/drive/MyDrive/Helmet_Detection.v1i.coco/_validannotations.coco.json'

In [None]:
from mrcnn.utils import Dataset

class helmetDataset(Dataset):

    def extract_boxes(self, filename):
        # load and parse the file
        tree = ElementTree.parse(filename)
        # get the root of the document
        root = tree.getroot()

        boxes = list()

        for box in root.findall('.//bndbox'):
            xmin = int(box.find('xmin').text)
            ymin = int(box.find('ymin').text)
            xmax = int(box.find('xmax').text)
            ymax = int(box.find('ymax').text)

            coors = [xmin, ymin, xmax, ymax]
            boxes.append(coors)

        width = int(root.find('.//size/width').text)
        height = int(root.find('.//size/height').text)

        return boxes, width, height


    def load_dataset(self, img_path, annot_path, is_train=True):
        self.add_class("dataset", 1, "kangaroo")

        for filename in os.listdir(img_path):
            image_id = filename[:-4]

            # skip bad images
            if image_id in ['00090']:
                continue

            if is_train and int(image_id) >= 150:
                continue
            if not is_train and int(image_id) < 150:
                continue

            img_p = img_path + filename
            ann_p = annot_path + image_id + '.xml'

            self.add_image('dataset', image_id=image_id, path=img_p,
                           annotation=ann_p)


    def load_mask(self, image_id):
        info = self.image_info[image_id]
        path = info['annotation']

        boxes, w, h = self.extract_boxes(path)

        masks = np.zeros([h, w, len(boxes)], dtype='uint8')

        class_ids = list()
        for i in range(len(boxes)):
            box = boxes[i]
            row_s, row_e = box[1], box[3]
            col_s, col_e = box[0], box[2]

            masks[row_s:row_e, col_s:col_e, i] = 1
            class_ids.append(self.class_names.index('kangaroo'))

        return masks, np.asarray(class_ids, dtype='int32')

    def image_reference(self, image_id):
        info = self.image_info[image_id]
        return info['path']

In [None]:
train_set = helmetDatasetDataset()

train_set.load_dataset(Helmet_PATH, ANNOT_PATH, is_train=True)

train_set.prepare()

print('Train: %d' % len(train_set.image_ids))

In [None]:
test_set = helmetDatasetDataset()

test_set.load_dataset(Helmet_PATH, ANNOT_PATH, is_train=False)

test_set.prepare()

print('Test: %d' % len(test_set.image_ids))

In [None]:
image_id = 0

image = train_set.load_image(image_id)
print(image.shape)

mask, class_ids = train_set.load_mask(image_id)
print(mask.shape)

In [None]:
_ = plt.figure(figsize=(15,8))
_ = plt.imshow(image)
_ = plt.imshow(mask[:,:,0], cmap='gray', alpha=0.5)
_ = plt.show()

In [None]:
plt.figure(figsize=(20,15))
for i in range(9):
    plt.subplot(330 + 1 + i)
    plt.axis('off')

    image = train_set.load_image(i)

    mask, _ = train_set.load_mask(i)

    plt.imshow(image)
    for j in range(mask.shape[2]):
        plt.imshow(mask[:,:,j], cmap='gray', alpha=0.3)

plt.show()

<h4>Debugging</h4>

In [None]:
for image_id in train_set.image_ids:

    info = train_set.image_info[image_id]

    print(info)

<h3><center>4. Extract Boundary Boxes</center></h3>

In [None]:
from mrcnn.visualize import display_instances
from mrcnn.utils import extract_bboxes

image_id = 15

image = train_set.load_image(image_id)

mask, class_ids = train_set.load_mask(image_id)

bbox = extract_bboxes(mask)

display_instances(image, bbox, mask, class_ids, train_set.class_names)

<h3><center>5. Configuration for training</center></h3>
<div style="font-family:verdana; word-spacing:1.7px;">
The pre-defined model architecture and weights can be loaded. This can be achieved by calling the load_weights().<br><br>
Class-specific output layers are removed using exclude argument.<br><br>
We can also specify what layers to train. In this case, we will only train the heads, that is the output layers of the model.
    </div>

In [None]:
from mrcnn.config import Config

class helmetconfig(Config):

    NAME = "helmet_cfg"
    NUM_CLASSES = 1 + 1

    GPU_COUNT = 1
    IMAGES_PER_GPU = 3

    TRAIN_ROIS_PER_IMAGE = 32
    MAX_GT_INSTANCES = 7
    DETECTION_MAX_INSTANCES = 7

    STEPS_PER_EPOCH = 131

In [None]:
config = helmetconfig()
config.display()

model = MaskRCNN(mode='training', model_dir='/kaggle/working/Mask_RCNN/Mask_RCNN/',
                 config=config)

# load weights (mscoco) and exclude the output layers
model.load_weights('/kaggle/input/mask-rcnn/mask_rcnn_coco.h5', by_name=True,
                   exclude=["mrcnn_class_logits","mrcnn_bbox_fc", "mrcnn_bbox", "mrcnn_mask"])

<h3><center>5. Training Output layers</center></h3>
<div style="font-family:verdana; word-spacing:1.7px;">

In [None]:
# train weights (output layers or ✬heads✬)
model.train(train_set, test_set, learning_rate=config.LEARNING_RATE,
            epochs=1, layers='heads')

<h3><center>6. Evaluate Model</center></h3>

<div style="font-family:verdana; word-spacing:1.7px;">
The performance of a model for an object recognition task is often evaluated using the mean absolute precision, or mAP. We are predicting bounding boxes so we can determine whether a bounding box prediction is good or not based on how well the predicted and actual bounding boxes overlap. This can be calculated by dividing the area of the overlap by the total area of both bounding boxes, or the intersection divided by the union, referred to as intersection over union, or IoU. A perfect bounding box prediction will have an IoU of 1. It is standard to assume a positive prediction of a bounding box if the IoU is greater than 0.5, e.g. they overlap by 50% or more. Precision refers to the percentage of the correctly predicted bounding boxes (IoU > 0.5) out of all bounding boxes predicted. Recall is the percentage of the correctly predicted bounding boxes (IoU > 0.5) out of all objects in the photo.<br><br>
    The average or mean of the average precision (AP) across all of the images in a dataset is called the mean average precision, or mAP. The mask-rcnn library provides a mrcnn.utils.compute_ap to calculate the AP and other metrics for a given images.
    </div>

In [None]:
WEIGHTS_PATH = '/kaggle/working/Mask_RCNN/Mask_RCNN/helmet_cfg20210224T0711/'

In [None]:
from mrcnn.utils import compute_ap
from mrcnn.model import load_image_gt
from mrcnn.model import mold_image

class PredictionConfig(Config):
    NAME = "helmet_cfg"

    NUM_CLASSES = 1 + 1
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

def evaluate_model(dataset, model, cfg):
    APs = list()
    for image_id in dataset.image_ids:
        # load image, bounding boxes and masks for the image id
        image, _, gt_class_id, gt_bbox, gt_mask = load_image_gt(dataset, cfg,
                                                                image_id,use_mini_mask=False)

        # convert pixel values (e.g. center)
        scaled_image = mold_image(image, cfg)

        # convert image into one sample
        sample = np.expand_dims(scaled_image, 0)

        yhat = model.detect(sample, verbose=0)

        # extract results for first sample
        r = yhat[0]

        AP, _, _, _ = compute_ap(gt_bbox, gt_class_id, gt_mask,
                                 r["rois"], r["class_ids"], r["scores"], r["masks"])
        APs.append(AP)

    mAP = np.mean(APs)
    return mAP

In [None]:
cfg = PredictionConfig()

model = MaskRCNN(mode='inference', model_dir='/kaggle/working/Mask_RCNN/Mask_RCNN/',
                 config=cfg)

model.load_weights(WEIGHTS_PATH+'mask_rcnn_helmet_cfg_0000.h5', by_name=True)

train_mAP = evaluate_model(train_set, model, cfg)

print("Train mAP: %.3f" % train_mAP)

test_mAP = evaluate_model(test_set, model, cfg)

print("Test mAP: %.3f" % test_mAP)

In [None]:
from mrcnn.model import mold_image

def plot_actual_vs_predicted(dataset, model, cfg, n_images=5):
    for i in range(n_images):
        image = dataset.load_image(i)
        mask, _ = dataset.load_mask(i)
        scaled_image = mold_image(image, cfg)
        sample = np.expand_dims(scaled_image, 0)
        yhat = model.detect(sample, verbose=0)[0]

        plt.figure(figsize=(20,15))
        plt.subplot(n_images, 2, i*2+1)
        plt.axis('off')
        plt.imshow(image)

        if (i==0):
            plt.title('Actual')

        for j in range(mask.shape[2]):
            plt.imshow(mask[:,:,j], cmap='gray', alpha=0.3)

        plt.subplot(n_images, 2, i*2+2)
        plt.axis('off')
        plt.imshow(image)

        if (i==0):
            plt.title('Predicted')

        ax = plt.gca()

        for box in yhat['rois']:
            y1, x1, y2, x2 = box
            width, height = x2 - x1, y2 - y1
            rect = Rectangle((x1, y1), width, height, fill=False, color='red')
            ax.add_patch(rect)

        plt.show()

In [None]:
plot_actual_vs_predicted(train_set, model, cfg)