#Install Detectron2

In [0]:
# install dependencies: (use cu100 because colab is on CUDA 10.0)
!pip install -U torch==1.4+cu100 torchvision==0.5+cu100 -f https://download.pytorch.org/whl/torch_stable.html 
!pip install cython pyyaml==5.1
!pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
import torch, torchvision
torch.__version__
!gcc --version
# opencv is pre-installed on colab

In [0]:
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu100/index.html

# Setup environment

In [0]:
# You may need to restart your runtime prior to this, to let your installation take effect
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import cv2
import random
from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

# Prepare custom dataset

To prepare custom dataset [coco-annotator](https://github.com/jsbroks/coco-annotator) can be used.  
Docker based setup instructions can be found [here](https://github.com/jsbroks/coco-annotator/wiki/Getting-Started#installing-with-docker).  
If you prefer desktop app, you can use [labelme](https://github.com/wkentaro/labelme).

Import dataset from gdrive

In [0]:
from google.colab import drive

drive.mount('/content/gdrive')

Check if dataset is available

In [0]:
!ls '/content/gdrive/My Drive/D2CustomDataset'

Register the coin dataset to detectron2, following the [detectron2 custom dataset tutorial](https://detectron2.readthedocs.io/tutorials/datasets.html).
Here, the dataset is in its custom format, therefore we write a function to parse it and prepare it into detectron2's standard format.  
Annotations were generated with [labelme](https://github.com/wkentaro/labelme), so convert them to detectron2 format.


In [0]:
import os
import numpy as np
import json
from detectron2.structures import BoxMode

def label_to_category_id(label):
	"""labelme generates text labels, but detectron2 wants numerical category ids"""
	labels = {
		'coin'	: 0
	}
	
	if label not in labels:
		return -1

	return labels[label]
	
	
def get_coin_dicts(img_dir):
	dataset_dicts = []  
	idx = 0
	for file in os.listdir(img_dir):
		if not file.endswith('.json'):
			continue
		
		json_file = os.path.join(img_dir, file)
		with open(json_file) as f:
			img_ann = json.load(f)
		
		record = {}
		
		filename = os.path.join(img_dir, img_ann['imagePath'])
			
		record['file_name'] = filename
		record['image_id'] = idx
		record['height'] = img_ann['imageHeight']
		record['width'] = img_ann['imageWidth']
		
		annos = img_ann['shapes']
		objs = []
		for anno in annos:
			assert anno['points']
			if anno['shape_type'] == 'polygon':
				poly = []
				bbox = [np.inf,np.inf,-np.inf,-np.inf]	# x_min, y_min, x_max, y_max
				for x, y in anno['points']:
					poly.append(x+0.5)
					poly.append(y+0.5)
					# set bounding box coords
					if x < bbox[0]: bbox[0] = x
					if x > bbox[2]: bbox[2] = x
					if y < bbox[1]: bbox[1] = y
					if y > bbox[3]: bbox[3] = y
					obj = {
						'bbox': bbox,
						'bbox_mode': BoxMode.XYXY_ABS,
						'segmentation': [poly],
						'category_id': label_to_category_id(anno['label']),
						'iscrowd': 0
					}
					objs.append(obj)
		record['annotations'] = objs
		dataset_dicts.append(record)
		idx += 1
	
	return dataset_dicts

from detectron2.data import DatasetCatalog, MetadataCatalog

gdrive_dir = '/content/gdrive/My Drive/D2CustomDataset/'
for d in ['train', 'val']:
    DatasetCatalog.register("coin_" + d, lambda d=d: get_coin_dicts(gdrive_dir + d))
    MetadataCatalog.get("coin_" + d).set(thing_classes=["coin"])
coin_metadata = MetadataCatalog.get("coin_train")

To verify the data loading is correct, let's visualize the annotations of randomly selected samples in the training set:



In [0]:
dataset_dicts = get_coin_dicts(gdrive_dir + 'train')
for d in random.sample(dataset_dicts, 3):
    img = cv2.imread(d['file_name'])
    visualizer = Visualizer(img[:, :, ::-1], metadata=coin_metadata, scale=0.5)
    vis = visualizer.draw_dataset_dict(d)
    cv2_imshow(vis.get_image()[:, :, ::-1])

# Train model

Now, let's fine-tune a coco-pretrained R50-FPN Mask R-CNN model on custom dataset.  
Training on Colab's P100 GPU is usually 3 times faster than on K80 GPU.


In [0]:
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file('COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml'))
cfg.DATASETS.TRAIN = ('coin_train',)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url('COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml')  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 500    # 500 iterations seems good enough for my dataset, can be changed empirically
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512   # this param should be changed empirically too
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (coin)
cfg.OUTPUT_DIR = gdrive_dir

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

In [0]:
# Look at training curves in tensorboard:
%load_ext tensorboard
%tensorboard --logdir output

# Test model

Now, let's run inference with the trained model on the balloon validation dataset. First, let's create a predictor using the model we just trained:
Then, we randomly select several samples to visualize the prediction results.

In [0]:
cfg.MODEL.WEIGHTS = os.path.join(gdrive_dir, 'model_final.pth')
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7   # set the testing threshold for this model
cfg.DATASETS.TEST = ('coin_val', )
predictor = DefaultPredictor(cfg)

from detectron2.utils.visualizer import ColorMode
dataset_dicts = get_coin_dicts(gdrive_dir + 'val')
for d in random.sample(dataset_dicts, 3):    
    im = cv2.imread(d['file_name'])
    outputs = predictor(im)
    v = Visualizer(im[:, :, ::-1],
                   metadata=coin_metadata, 
                   scale=0.8, 
                   instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels
    )
    v = v.draw_instance_predictions(outputs['instances'].to('cpu'))
    cv2_imshow(v.get_image()[:, :, ::-1])

# Validate model

Import pretrained model weights

In [0]:
config = get_cfg()
config.merge_from_file(model_zoo.get_config_file('COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml'))
config.MODEL.WEIGHTS = os.path.join(gdrive_dir, 'model_final.pth')
config.DATALOADER.NUM_WORKERS = 2
config.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
config.SOLVER.IMS_PER_BATCH = 2
config.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (coin)

Check predictions on unannotated data

In [0]:
predictor_new = DefaultPredictor(config)

img_dir = os.path.join(gdrive_dir, 'predict')
for file in os.listdir(img_dir):
  if not file.endswith('.jpg'):
    continue
  im = cv2.imread(os.path.join(img_dir, file))
  output = predictor_new(im)
  v = Visualizer(im[:, :, ::-1], coin_metadata, scale=1.2)
  v = v.draw_instance_predictions(output["instances"].to("cpu"))
  cv2_imshow(v.get_image()[:, :, ::-1])
    