<a href="https://colab.research.google.com/github/nhdchicken/nhd-colab/blob/master/notebooks/incubation/buttons-mask-rcnn/buttonsMaskRcnnTrainTF2_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Mask R-CNN Demo on Buttons dataset using TF 2.2.

A quick intro to using the pre-trained model to detect the UI buttons.

This experiment is performed on colab so mount the nhdchicken drive

In [None]:
%%bash
if [  -d "/content/nhd-colab" ]
then
   echo "Environment already initialized"
elif [ -d "/content" ] && [ ! -d "/content/nhd-colab" ]
then
    cd /content || exit 1;
    echo "Installing https://github.com/nhdchicken/nhd-colab.git"
    git clone --recurse-submodules https://github.com/nhdchicken/nhd-colab.git || exit 1;
    cd nhd-colab || exit 1;
else
    echo "Not running in Colab - going to root of repos"
    cd `git rev-parse --show-toplevel` || exit 1;
fi
pwd
pip install utils/nhdcolab/ > /dev/null 2>&1 || exit 1;
nhdcolab > /dev/null 2>&1 || exit 1;
echo "Great Success!"

In [None]:
!nhdcolab init mp-mask-rcnn

In [None]:
from nhdcolab.environment import NHDEnvironment
NHD_ENV = NHDEnvironment(gdrive_mount=True)

Move to Mask_RCNN directory on the drive

In [None]:
cd mask-rcnn/matterport/

Import the required libraries

In [None]:
from mrcnn.config import Config
from mrcnn import model as modellib
from mrcnn import visualize
import mrcnn
from mrcnn.utils import Dataset
from mrcnn.model import MaskRCNN
import numpy as np
from numpy import zeros
from numpy import asarray
import colorsys
import argparse
import imutils
import random
import cv2
import os
import time
from matplotlib import pyplot
from matplotlib.patches import Rectangle
from  tensorflow.keras.models import load_model
%matplotlib inline
from os import listdir
from xml.etree import ElementTree

We create a myMaskRCNNConfig class for training on the buttons dataset. It is derived from the base Mask R-CNN Config class and overrides some values.

In [None]:
class myMaskRCNNConfig(Config):
    # give the configuration a recognizable name
    NAME = "MaskRCNN_config"
 
    # set the number of GPUs to use along with the number of images
    # per GPU
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
 
    # number of classes (we would normally add +1 for the background)
     # kangaroo + BG
    NUM_CLASSES = 1+2
   
    # Number of training steps per epoch
    STEPS_PER_EPOCH = 131
    
    # Learning rate
    LEARNING_RATE=0.006
    
    # Skip detections with < 90% confidence
    DETECTION_MIN_CONFIDENCE = 0.9
    
    # setting Max ground truth instances
    MAX_GT_INSTANCES=10

Create an instance of the myMaskRCNNConfig class

In [None]:
config = myMaskRCNNConfig()
config.display()

Build the custom buttons data set.

In [None]:
class ButtonsDataset(Dataset):
    # load the dataset definitions
    def load_dataset(self, dataset_dir, is_train=True):
        
        # Add classes. We have only one class to add.
        self.add_class("Buttons", 1, "button")
        self.add_class("Buttons", 2, "button_off")
        # define data locations for images and annotations
        images_dir = dataset_dir + '//images//'
        annotations_dir = dataset_dir + '//annotations//'
        
        # Iterate through all files in the folder to 
        #add class, images and annotaions
        for filename in listdir(images_dir):
            
            # extract image id
            image_id = filename[:-4]
                        
            # setting image file
            img_path = images_dir + filename
            
            # setting annotations file
            ann_path = annotations_dir + image_id + '.xml'
            
            # adding images and annotations to dataset
            self.add_image('Buttons', image_id=image_id, path=img_path, annotation=ann_path)
# extract bounding boxes from an annotation file
    def extract_boxes(self, filename):
        
        # load and parse the file
        tree = ElementTree.parse(filename)
        # get the root of the document
        root = tree.getroot()
        # extract each bounding box
        boxes = list()
        class_names = list()
        for name in root.findall('object'):
            class_name = name.find('name').text
            class_names.append(class_name)
        for box in root.findall('.//bndbox'):
            xmin = int(box.find('xmin').text)
            ymin = int(box.find('ymin').text)
            xmax = int(box.find('xmax').text)
            ymax = int(box.find('ymax').text)
            coors = [xmin, ymin, xmax, ymax]
            boxes.append(coors)
        
        # extract image dimensions
        width = int(root.find('.//size/width').text)
        height = int(root.find('.//size/height').text)
        return boxes, width, height, class_names
# load the masks for an image
    """Generate instance masks for an image.
       Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
     """
    def load_mask(self, image_id):
        # get details of image
        info = self.image_info[image_id]
        # define anntation  file location
        path = info['annotation']
        # load XML
        boxes, w, h, class_names = self.extract_boxes(path)
        # create one array for all masks, each on a different channel
        masks = zeros([h, w, len(boxes)], dtype='uint8')
        
        # create masks
        class_ids = list()
        for i in range(len(boxes)):
            box = boxes[i]
            row_s, row_e = box[1], box[3]
            col_s, col_e = box[0], box[2]
            masks[row_s:row_e, col_s:col_e, i] = 1
            class_ids.append(self.class_names.index(class_names[i]))
        return masks, asarray(class_ids, dtype='int32')
# load an image reference
     # Return the path of the image
    def image_reference(self, image_id):
        info = self.image_info[image_id]
        return info['path']

Prepare the train and test set


In [None]:
# prepare train set
train_set =ButtonsDataset()
train_set.load_dataset('..//..//..//drive//My Drive//Buttons//train', is_train=True)
train_set.prepare()
print('Train: %d' % len(train_set.image_ids))
# prepare test/val set
test_set = ButtonsDataset()
test_set.load_dataset('..//..//..//drive//My Drive//Buttons//verification', is_train=False)
test_set.prepare()
print('Test: %d' % len(test_set.image_ids))

Initialize Mask R-CNN model for “training” using the Config instance that we created

In [None]:
print("Loading Mask R-CNN model...")
model = modellib.MaskRCNN(mode="training", config=config, model_dir='./')

Load the pre-trained weights for the Mask R-CNN from COCO data set excluding the last few layers

In [None]:
#load the weights for COCO

model.load_weights('..//..//..//drive//My Drive//models//mask-rcnn//mask_rcnn_coco.h5', 
                   by_name=True, 
                   exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",  "mrcnn_bbox", "mrcnn_mask"])

Train the heads with higher learning rate to speed up the learning

In [None]:
## train heads with higher lr to speedup the learning
model.train(train_set, test_set, learning_rate=2*config.LEARNING_RATE, epochs=5, layers='heads')
history = model.keras_model.history.history

Save the trained weights for custom data set

In [None]:
import time
model_path = '..//..//..//drive//My Drive//models//maskrcnn_Buttons_TF2.2Epoch5Lr0.06.h5'
model.keras_model.save_weights(model_path)

Detecting objects in the image with masks and bounding box from the trained model

In [None]:
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
#Loading the model in the inference mode
model = modellib.MaskRCNN(mode="inference", config=config, model_dir='./')
# loading the trained weights o the custom dataset
model.load_weights(model_path, by_name=True)
img = load_img("..//..//..//drive//My Drive//Buttons//sample-images//EnableDolby__HP__2YConfDAHT__1024x768.png")
img = img_to_array(img)

results= model.detect([img],verbose=1)
# Display results

r = results[0]
print(test_set.class_names)
visualize.display_instances(img, r['rois'], r['masks'], r['class_ids'], 
                            test_set.class_names, r['scores'], 
                            title="Predictions")