Source: A simple guide to Mask R-CNN implementation on a custom dataset (https://medium.com/analytics-vidhya/a-simple-guide-to-maskrcnn-custom-dataset-implementation-27f7eab381f2)

# Step 1: Clone the repository.

In [None]:
!rm -r /content/Custom_MaskRCNN/*

In [None]:
!git clone https://github.com/soumyaiitkgp/Custom_MaskRCNN.git
!pip install -r Custom_MaskRCNN/requirements.txt

# Step 2: Prepare the data.

In [None]:
import json
import os
import numpy as np

class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)

categ = {"Wall Clock": 0, "Wrist watch":1}

def convert_annotation(set_type): # set_type should be 'train' or 'val'
    coco_info = {}
    coco_info["info"] = {
            "year": "2021",
            "version": "2",
            "description": "Exported from roboflow.ai",
            "contributor": "",
            "url": "https://public.roboflow.ai/object-detection/undefined",
            "date_created": "2021-12-12T10:32:27+00:00"
        }
    coco_info["licenses"] = [
            {
                "id": 1,
                "url": "https://creativecommons.org/licenses/by/4.0/",
                "name": "CC BY 4.0"
            }
        ]
    coco_info["categories"] = [
            {
                "id": 0,
                "name": "Wall Clock",
                "supercategory": "Watch"
            },
            {
                "id": 1,
                "name": "Wrist watch",
                "supercategory": "Watch"
            }
        ]
    coco_info["images"] = []
    coco_info["annotations"] = []

    idx = 0
    folder1 = f'/content/{set_type}_ann'
    for seg_ann in os.listdir(folder1):
        ann_path = os.path.join(folder1, seg_ann)
        with open(ann_path, "r") as f:
            info = json.load(f)
            points = info["objects"][0]["points"]["exterior"]
            h, w = info["size"]["height"], info["size"]["width"]
            classTitle = info["objects"][0]["classTitle"]
            xy = np.array(points).transpose()
            x_min, y_min, x_max, y_max = np.min(xy[0]), np.min(xy[1]), np.max(xy[0]), np.max(xy[1]), 
            bbox = [x_min, y_min, x_max-x_min, y_max-y_min]
            area = bbox[2]*bbox[3]
            coco_info["images"].append({"id": idx, "license":1, "file_name":seg_ann[:-5], "height":h, "width":w, "data_captured":"2021-12-12T10:32:27+00:00"})
            coco_info["annotations"].append({"id":idx, "image_id":idx, "category_id": categ[classTitle], "bbox":bbox, "area": area, "segmentation": points, "iscrowd":0})
            idx+=1

    ann_name = f'{set_type}_ann.json'
    with open(ann_name, "w") as f:
        json.dump(coco_info, f, cls=NpEncoder)

In [None]:
convert_annotation('train')
convert_annotation('val')

In [None]:
%cp -a /content/train_img/. /content/Custom_MaskRCNN/samples/custom/dataset/train/
%mv /content/train_ann.json /content/via_region_data.json
%cp /content/via_region_data.json /content/Custom_MaskRCNN/samples/custom/dataset/train/

In [None]:
%cp -a /content/val_img/. /content/Custom_MaskRCNN/samples/custom/dataset/val/
%mv /content/val_ann.json /content/via_region_data.json
%cp /content/via_region_data.json /content/Custom_MaskRCNN/samples/custom/dataset/val/

# Step 3: Prepare the model.

In [None]:
# change /content/Custom_MaskRCNN/samples/custom/custom.py according to instructions

#  Train the model.

In [None]:
!pip install mrcnn

Collecting mrcnn
  Downloading mrcnn-0.2.tar.gz (51 kB)
[?25l[K     |██████▍                         | 10 kB 19.7 MB/s eta 0:00:01[K     |████████████▊                   | 20 kB 24.9 MB/s eta 0:00:01[K     |███████████████████             | 30 kB 12.1 MB/s eta 0:00:01[K     |█████████████████████████▌      | 40 kB 9.7 MB/s eta 0:00:01[K     |███████████████████████████████▉| 51 kB 5.1 MB/s eta 0:00:01[K     |████████████████████████████████| 51 kB 196 kB/s 
[?25hBuilding wheels for collected packages: mrcnn
  Building wheel for mrcnn (setup.py) ... [?25l[?25hdone
  Created wheel for mrcnn: filename=mrcnn-0.2-py3-none-any.whl size=54930 sha256=46031118a955ca5321aa88e7e6ba1cf192376229b2746e522e40f3fbf5c6fa9f
  Stored in directory: /root/.cache/pip/wheels/1d/94/0d/03ff96abc43d2d6c8299a92cbb4eced2a1eda3ca7911c19427
Successfully built mrcnn
Installing collected packages: mrcnn
Successfully installed mrcnn-0.2


In [None]:
!pip uninstall keras -y
!pip uninstall keras-nightly -y
!pip uninstall keras-Preprocessing -y
!pip uninstall keras-vis -y
!pip uninstall tensorflow -y
!pip uninstall h5py -y

Found existing installation: keras 2.7.0
Uninstalling keras-2.7.0:
  Successfully uninstalled keras-2.7.0
Found existing installation: Keras-Preprocessing 1.1.2
Uninstalling Keras-Preprocessing-1.1.2:
  Successfully uninstalled Keras-Preprocessing-1.1.2
Found existing installation: keras-vis 0.4.1
Uninstalling keras-vis-0.4.1:
  Successfully uninstalled keras-vis-0.4.1
Found existing installation: tensorflow 2.7.0
Uninstalling tensorflow-2.7.0:
  Successfully uninstalled tensorflow-2.7.0
Found existing installation: h5py 3.1.0
Uninstalling h5py-3.1.0:
  Successfully uninstalled h5py-3.1.0


In [None]:
!pip install tensorflow==1.13.1
!pip install keras==2.1.6
!pip install h5py==2.10.0

Collecting tensorflow==1.13.1
  Downloading tensorflow-1.13.1-cp37-cp37m-manylinux1_x86_64.whl (92.6 MB)
[K     |████████████████████████████████| 92.6 MB 1.3 MB/s 
Collecting keras-preprocessing>=1.0.5
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)
[K     |████████████████████████████████| 42 kB 1.1 MB/s 
Collecting keras-applications>=1.0.6
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[K     |████████████████████████████████| 50 kB 5.6 MB/s 
[?25hCollecting tensorboard<1.14.0,>=1.13.0
  Downloading tensorboard-1.13.1-py3-none-any.whl (3.2 MB)
[K     |████████████████████████████████| 3.2 MB 16.2 MB/s 
Collecting tensorflow-estimator<1.14.0rc0,>=1.13.0
  Downloading tensorflow_estimator-1.13.0-py2.py3-none-any.whl (367 kB)
[K     |████████████████████████████████| 367 kB 48.7 MB/s 
Collecting h5py
  Downloading h5py-3.6.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (4.1 MB)
[K     |████████████████████████████████| 4.1 MB 1

In [None]:
!python /content/Custom_MaskRCNN/samples/custom/custom.py train --dataset=/content/Custom_MaskRCNN/samples/custom/dataset --weights=coco

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.
Weights:  coco
Dataset:  /content/Custom_MaskRCNN/samples/custom/dataset
Logs:  /logs

Configurations:
BACKBONE                       resnet101
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     2
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.9
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 2
IMAGE_MAX_DIM                  1024
IMAGE_META_SIZE                15
IMAGE_MIN_DIM                  800
IMAGE_

# Step 1: Clone the Mask R-CNN repository


Source: Object detection using Mask R-CNN on a custom dataset (https://towardsdatascience.com/object-detection-using-mask-r-cnn-on-a-custom-dataset-4f79ab692f6d)

In [None]:
import os
%cd /content/Mask_RCNN/
os.getcwd()

/content/Mask_RCNN


'/content/Mask_RCNN'

In [None]:
#!git clone https://github.com/matterport/Mask_RCNN.git
%cd /content/Mask_RCNN/
!python setup.py install

In [None]:
%cp ../../mask_rcnn_coco.h5 /content/Mask_RCNN/

In [None]:
from mrcnn.config import Config
from mrcnn import model as modellib
from mrcnn import visualize
import mrcnn
from mrcnn.utils import Dataset
from mrcnn.model import MaskRCNN
import numpy as np
from numpy import zeros
from numpy import asarray
import colorsys
import argparse
import imutils
import random
import cv2
import os
import time
from matplotlib import pyplot
from matplotlib.patches import Rectangle
from keras.models import load_model
%matplotlib inline
from os import listdir
from xml.etree import ElementTree

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [None]:
class myMaskRCNNConfig(Config):
    # give the configuration a recognizable name
    NAME = "MaskRCNN_config"
 
    # set the number of GPUs to use along with the number of images
    # per GPU
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
 
    # number of classes (we would normally add +1 for the background)
     # kangaroo + BG
    NUM_CLASSES = 3
   
    # Number of training steps per epoch
    STEPS_PER_EPOCH = 131
    
    # Learning rate
    LEARNING_RATE=0.006
    
    # Skip detections with < 90% confidence
    DETECTION_MIN_CONFIDENCE = 0.9
    
    # setting Max ground truth instances
    MAX_GT_INSTANCES=10

In [None]:
config = myMaskRCNNConfig()

In [None]:
config.display()


Configurations:
BACKBONE                       resnet101
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     1
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.9
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 1
IMAGE_CHANNEL_COUNT            3
IMAGE_MAX_DIM                  1024
IMAGE_META_SIZE                15
IMAGE_MIN_DIM                  800
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [1024 1024    3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.006
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE         