# Mask RCNN Example

Using MatterPort with Keras: https://github.com/matterport/Mask_RCNN

Based on https://towardsdatascience.com/object-detection-using-mask-r-cnn-on-a-custom-dataset-4f79ab692f6d


## Environment Setup

In [None]:
!pip install tensorflow-gpu==1.13.1
!pip install tensorflow==1.13.1
!pip install keras==2.0.8
!pip install imutils

In [None]:
#Checking TF version
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import tensorflow as tf
tf.__version__

In [None]:
#Install Matterport
!git clone https://github.com/matterport/Mask_RCNN.git

In [None]:
# Installing Matterport and downloading pretrained model for Coco Dataset
import os
os.chdir("./Mask_RCNN")
!python setup.py install
!wget https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5

## Imports

In [None]:
import mrcnn
from mrcnn.config import Config
from mrcnn import model as modellib
from mrcnn import visualize
from mrcnn.utils import Dataset
from mrcnn.model import MaskRCNN
from mrcnn import utils
import numpy as np
from numpy import zeros
from numpy import asarray
import colorsys
import argparse
import imutils
import random
import cv2
import os
from os import listdir
import time
from matplotlib import pyplot
from matplotlib.patches import Rectangle
%matplotlib inline
from keras.models import load_model
from xml.etree import ElementTree

In [None]:
# Setting some matterport configs
class myMaskRCNNConfig(Config):
    # give the configuration a recognizable name
    NAME = "MaskRCNN_config"
 
    # set the number of GPUs to use along with the number of images
    # per GPU
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

    # Use small images for faster training. 
    #IMAGE_MIN_DIM = 128
    #IMAGE_MAX_DIM = 128
 
    # number of classes (we would normally add +1 for the background)
    # kangaroo + BG
    NUM_CLASSES = 1+1
   
    # Number of training steps per epoch
    STEPS_PER_EPOCH = 131
    
    # Reduce training ROIs per image because the images are small and have few objects.
    #TRAIN_ROIS_PER_IMAGE = 20
    # Use smaller anchors because our image and objects are small
    #RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)  # anchor side in pixels
    # set appropriate step per epoch and validation step
    #STEPS_PER_EPOCH = len(X_train)//(GPU_COUNT*IMAGES_PER_GPU)
    #VALIDATION_STEPS = len(X_val)//(GPU_COUNT*IMAGES_PER_GPU)


    # Learning rate
    LEARNING_RATE=0.006
    
    # Skip detections with < 90% confidence
    DETECTION_MIN_CONFIDENCE = 0.9
    
    # setting Max ground truth instances
    MAX_GT_INSTANCES=10

config = myMaskRCNNConfig()
config.display()

## Loading a custom Dataset

In [None]:
class KangarooDataset(Dataset):
    # load the dataset definitions
    def load_dataset(self, dataset_dir, is_train=True):
        
        # Add classes. We have only one class to add.
        self.add_class("dataset", 1, "kangaroo")
        
        # define data locations for images and annotations
        images_dir = dataset_dir + '/images/'
        annotations_dir = dataset_dir + '/annotations/'
        
        # Iterate through all files in the folder to 
        #add class, images and annotaions
        for filename in listdir(images_dir):
            
            # extract image id
            #image_id = filename[:-4]
            image_id = filename.split('-')[1].split('.')[0]
            
            # skip bad images
            if image_id in ['00090']:
                continue
            # skip some images if we are building the train set
            if is_train and int(image_id) >= 80:
                continue
            # skip some images if we are building the test/val set
            if not is_train and int(image_id) < 690:
                continue
            
            # setting image file
            img_path = images_dir + filename
            
            # setting annotations file
            ann_path = annotations_dir + filename[:-3] + 'xml'
            
            # adding images and annotations to dataset
            self.add_image('dataset', image_id=image_id, path=img_path, annotation=ann_path)
# extract bounding boxes from an annotation file
    def extract_boxes(self, filename):
        
        # load and parse the file
        tree = ElementTree.parse(filename)
        # get the root of the document
        root = tree.getroot()
        # extract each bounding box
        boxes = list()
        for box in root.findall('.//bndbox'):
            xmin = int(box.find('xmin').text)
            ymin = int(box.find('ymin').text)
            xmax = int(box.find('xmax').text)
            ymax = int(box.find('ymax').text)
            coors = [xmin, ymin, xmax, ymax]
            boxes.append(coors)
        
        # extract image dimensions
        width = int(root.find('.//size/width').text)
        height = int(root.find('.//size/height').text)
        return boxes, width, height
# load the masks for an image
    """Generate instance masks for an image.
       Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
    """
    def load_mask(self, image_id):
        # get details of image
        info = self.image_info[image_id]
        
        # define anntation  file location
        path = info['annotation']
        
        # load XML
        boxes, w, h = self.extract_boxes(path)
       
        # create one array for all masks, each on a different channel
        masks = zeros([h, w, len(boxes)], dtype='uint8')
        
        # create masks
        class_ids = list()
        for i in range(len(boxes)):
            box = boxes[i]
            row_s, row_e = box[1], box[3]
            col_s, col_e = box[0], box[2]
            masks[row_s:row_e, col_s:col_e, i] = 1
            class_ids.append(self.class_names.index('kangaroo'))
        return masks, asarray(class_ids, dtype='int32')
# load an image reference
    """Return the path of the image."""
    def image_reference(self, image_id):
        info = self.image_info[image_id]
        print(info)
        return info['path']

In [None]:
os.chdir("../")
dataset_dir = "../input/kangaroodataset"
# prepare train set
train_set = KangarooDataset()
train_set.load_dataset(dataset_dir, is_train=True)
train_set.prepare()
print('Train: %d' % len(train_set.image_ids))
# prepare test/val set
test_set = KangarooDataset()
test_set.load_dataset(dataset_dir, is_train=False)
test_set.prepare()
print('Test: %d' % len(test_set.image_ids))

In [None]:
train_set.image_info[0]

In [None]:
# Looking to an example
image_id = 0
image = train_set.load_image(image_id)

mask, class_ids = train_set.load_mask(image_id)
bbox = utils.extract_bboxes(mask)
visualize.display_instances(image,bbox,mask, class_ids, train_set.class_names)

In [None]:
# Looking to an example
test_set.image_info[1]

In [None]:
# Looking to an example
image_id = 1
image = test_set.load_image(image_id)

mask, class_ids = test_set.load_mask(image_id)
bbox = utils.extract_bboxes(mask)
visualize.display_instances(image,bbox,mask, class_ids, test_set.class_names)

## Trainning the model

In [None]:
print("Loading Mask R-CNN model...")
model = modellib.MaskRCNN(mode="training", config=config, model_dir='./Mask_RCNN')
#load the weights for COCO
model.load_weights('./Mask_RCNN/mask_rcnn_coco.h5', 
                   by_name=True, 
                   exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",  "mrcnn_bbox", "mrcnn_mask"])

In [None]:
## train heads with higher lr to speedup the learning
model.train(train_set, test_set, learning_rate=2*config.LEARNING_RATE, epochs=1, layers='heads')
history = model.keras_model.history.history

## Evaluation

In [None]:
# Best model
model_path = './Mask_RCNN/maskrcnn_config20210604T1551/mask_rcnn_maskrcnn_config_0000.h5'
#Loading the model in the inference mode
model = modellib.MaskRCNN(mode="inference", config=config, model_dir='./')
# loading the trained weights o the custom dataset
model.load_weights(model_path, by_name=True)

In [None]:
# Testing on a sample
image_id = 1
image, image_meta, gt_class_id, gt_bbox, gt_mask =  modellib.load_image_gt(test_set, config, image_id, use_mini_mask=False)
info = test_set.image_info[image_id]
print("image ID: {}.{} ({}) {}".format(info["source"], info["id"], image_id, 
                                       test_set.image_reference(image_id)))
# convert pixel values (e.g. center)
scaled_image = modellib.mold_image(image, config)

sample = np.expand_dims(scaled_image, 0)
# make prediction
yhat = model.detect(sample, verbose=0)
# extract results for first sample
r = yhat[0]
visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], 
                            test_set.class_names, r['scores'], 
                            title="Predictions")

In [None]:
# calculate statistics, including AP
AP, _, _, _ = utils.compute_ap(gt_bbox, gt_class_id, gt_mask, r["rois"], r["class_ids"], r["scores"], r['masks'])
print(AP)

In [None]:
def evaluate_model(dataset, model, cfg):
    APs = list()
    for image_id in dataset.image_ids:
        # load image, bounding boxes and masks for the image id
        image, image_meta, gt_class_id, gt_bbox, gt_mask = modellib.load_image_gt(dataset, cfg, image_id, use_mini_mask=False)
        # convert pixel values (e.g. center)
        scaled_image = modellib.mold_image(image, cfg)
        # convert image into one sample
        sample = np.expand_dims(scaled_image, 0)
        # make prediction
        yhat = model.detect(sample, verbose=0)
        # extract results for first sample
        r = yhat[0]
        # calculate statistics, including AP
        AP, _, _, _ = utils.compute_ap(gt_bbox, gt_class_id, gt_mask, r["rois"], r["class_ids"], r["scores"], r['masks'])
        # store
        APs.append(AP)
    # calculate the mean AP across all images
    mAP = np.mean(APs)
    return mAP

In [None]:
# evaluate model on test dataset
test_mAP = evaluate_model(test_set, model, config)
print("Test mAP: %.3f" % test_mAP)

In [None]:
# Viewing some samples
num_samples = 4
fig, axes = pyplot.subplots(num_samples, 2, figsize=(15,7*num_samples))
for image_id in test_set.image_ids:
  image = test_set.load_image(image_id)
  mask, class_ids = test_set.load_mask(image_id)
  bbox = utils.extract_bboxes(mask)
  visualize.display_instances(image,bbox,mask, class_ids, test_set.class_names, ax=axes[image_id][0], title="Truth")
  #Predict
  #image, image_meta, gt_class_id, gt_bbox, gt_mask =  modellib.load_image_gt(test_set, config, image_id, use_mini_mask=False)
  scaled_image = modellib.mold_image(image, config)
  sample = np.expand_dims(scaled_image, 0)
  yhat = model.detect(sample, verbose=0)  
  r = yhat[0]
  visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], 
                            test_set.class_names, r['scores'], ax=axes[image_id][1], 
                            title="Predictions")
  num_samples-=1
  if num_samples == 0:
    break