# Semantic Segmentation

## Import Libraries

In [1]:
import sys, os, distutils.core
dist = distutils.core.run_setup("./detectron2/setup.py")
sys.path.insert(0, os.path.abspath('./detectron2'))

No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'


In [2]:
from detectron2.structures import BoxMode, PolygonMasks
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2 import model_zoo
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import ColorMode, Visualizer
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
import cv2
import sys
import os
import numpy as np
import json
import random
import pickle
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings("ignore")
%matplotlib inline

## Defined Functions

In [3]:
def get_data_dicts(path, classes):
    dataset_dicts = []
    files = [file for file in os.listdir(path) if file.endswith(".json")]

    for idx, json_filename in enumerate(files):
        json_file = os.path.join(path, json_filename)
        with open(json_file) as fptr:
            img_annotations = json.load(fptr)

        record = {}
        image_filename = os.path.join(path, img_annotations["imagePath"])
        record["file_name"] = image_filename
        record["image_id"] = idx
        
        height, width = cv2.imread(image_filename).shape[:2]
        record["width"], record["height"] = width, height #720, 480

        annotations = img_annotations["shapes"]
        objs = []
        for ann in annotations:
            px = [a[0] for a in ann["points"]] # x-coordinate (top-left and bottom-right corners)
            py = [a[1] for a in ann["points"]] # y-coordinate (top-left and bottom-right corners)
            
            px = [px[0],px[1],px[0],px[1]]
            py = [py[0],py[0],py[1],py[1]]
            
            poly = [(x, y) for x, y in zip(px, py)]
            poly = [p for x in poly for p in x]

            obj = {"bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
                    "bbox_mode": BoxMode.XYXY_ABS,
                    "segmentation": [poly],
                    "category_id": classes.index(ann['label']),
                    "iscrowd": 0}

            objs.append(obj)

        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts

In [4]:
def set_up_model(max_iter=500, save=None):
    cfg = get_cfg()
    cfg.MODEL.DEVICE='cpu'
    cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    cfg.DATASETS.TRAIN = ("category_train",)
    cfg.DATASETS.TEST = ()
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
    cfg.SOLVER.IMS_PER_BATCH = 2
    cfg.SOLVER.BASE_LR = 0.00025
    cfg.SOLVER.MAX_ITER = max_iter
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2
    
    if(save):
        with open(save, 'wb') as fptr:
            pickle.dump(cfg, fptr, protocol=pickle.HIGHEST_PROTOCOL)

    return cfg

In [5]:
def train_model(cfg):
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    trainer = DefaultTrainer(cfg) 
    trainer.resume_or_load(resume=False)
    trainer.train()

In [6]:
def semantic_segmentation(image, labels, masks):
    blank = np.zeros(image.shape)
    blank[:,:] = (139, 10, 80)
    color = None
    for label, mask in zip(labels, masks):
        if(not label):
            color = (255,0,0)
        else:
            color = (0,0,255)
                
        for row in range(len(mask)):
            for col in range(len(mask[row])):
                if(mask[row][col]):
                    blank[row][col] = color
    return blank

In [7]:
def visualizer(image, outputs, metadata, filename=None, bounding_box=False):
    mask = outputs["instances"].get("pred_masks")
    label = outputs["instances"].get("pred_classes")
    sem_seg_image = semantic_segmentation(image, label, mask)
    plt.figure(figsize = (14, 10))
    
    if(bounding_box):
        vis = Visualizer(image[:, :, ::-1], metadata=metadata, scale=0.8, instance_mode=ColorMode.SEGMENTATION)
        vis = vis.draw_instance_predictions(outputs["instances"].to("cpu"))
        plt.imshow(cv2.cvtColor(vis.get_image()[:, :, ::-1], cv2.COLOR_BGR2RGB))
    else:
        plt.imshow(sem_seg_image)
    
    if(filename):
        cv2.imwrite(os.path.join("./Results/", filename), sem_seg_image)
    
    plt.show()

In [8]:
def make_inferences(cfg_path, path, metadata, thresh=0.2, bounding_box=True):
    with open(cfg_path, 'rb') as fptr:
        cfg = pickle.load(fptr)
    
    cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = thresh 
    cfg.DATASETS.TEST = ()
    predictor = DefaultPredictor(cfg)

    test_dataset_dicts = get_data_dicts(path+'test', classes)

    for data in random.sample(test_dataset_dicts, len(test_dataset_dicts) - 1):    
        image = cv2.imread(data["file_name"])
        outputs = predictor(image)
        visualizer(image, outputs, metadata, bounding_box=bounding_box)
        
    return predictor

In [9]:
def evaluate(cfg_path, predictor):
    with open(cfg_path, 'rb') as fptr:
        cfg = pickle.load(fptr)
        
    evaluator = COCOEvaluator("category_test", output_dir="./output")
    val_loader = build_detection_test_loader(cfg, "category_test")
    print(inference_on_dataset(predictor.model, val_loader, evaluator))

In [10]:
def prediction(cfg_path, image_path, metadata, filename=None, bounding_box=False, thresh=0.3):
    with open(cfg_path, 'rb') as fptr:
        cfg = pickle.load(fptr)
    
    cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = thresh 
    
    predictor = DefaultPredictor(cfg)
    image = cv2.imread(image_path)
    outputs = predictor(image)
    visualizer(image, outputs, metadata, filename=filename, bounding_box=bounding_box)

## Creating Custom Datasets

In [11]:
classes = ["Pebbles", "Large Rock"]
path = r"./Images/"
cfg_path = r"cfg_model.pickle"

for d in ["train", "test"]:
    DatasetCatalog.register("category_" + d, lambda d=d: get_data_dicts(path + d, classes))
    MetadataCatalog.get("category_" + d).set(thing_classes=classes)

microcontroller_metadata = MetadataCatalog.get("category_train")

## Training

In [None]:
cfg = set_up_model(max_iter=500, save=cfg_path)
train_model(cfg)

[32m[10/27 23:30:19 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

[32m[10/27 23:30:21 d2.data.build]: [0mRemoved 0 images with no usable annotations. 30 images left.
[32m[10/27 23:30:21 d2.data.build]: [0mDistribution of instances among all 2 categories:
[36m|  category  | #instances   |  category  | #instances   |
|:----------:|:-------------|:----------:|:-------------|
|  Pebbles   | 429          | Large Rock | 209          |
|            |              |            |              |
|   total    | 638          |            |              |[0m
[32m[10/27 23:30:21 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()]
[32m[10/27 23:30:21 d2.data.build]: [0mUsing training sampler TrainingSampler
[32m[10/27 23:30:21 d2.data.common]: [0mSerializing 30 elements to byte tensors and concatenating them all ...
[32m[10/27 23:30:21 d2.data.common]: [0mSerialized dataset takes 0.12 MiB


Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (3, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (3,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (8, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (8,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (2, 256, 1, 1) in

[32m[10/27 23:30:25 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[10/27 23:38:53 d2.utils.events]: [0m eta: 3:20:00  iter: 19  total_loss: 4.141  loss_cls: 1.069  loss_box_reg: 0.4686  loss_mask: 0.6924  loss_rpn_cls: 1.684  loss_rpn_loc: 0.2286  time: 25.2775  data_time: 0.0135  lr: 9.7405e-06  
[32m[10/27 23:47:26 d2.utils.events]: [0m eta: 3:14:44  iter: 39  total_loss: 3.018  loss_cls: 0.913  loss_box_reg: 0.542  loss_mask: 0.6917  loss_rpn_cls: 0.6777  loss_rpn_loc: 0.2112  time: 25.4841  data_time: 0.0057  lr: 1.9731e-05  
[32m[10/27 23:56:04 d2.utils.events]: [0m eta: 3:08:07  iter: 59  total_loss: 2.52  loss_cls: 0.7783  loss_box_reg: 0.55  loss_mask: 0.6903  loss_rpn_cls: 0.2858  loss_rpn_loc: 0.2093  time: 25.6240  data_time: 0.0095  lr: 2.972e-05  
[32m[10/28 00:04:55 d2.utils.events]: [0m eta: 3:01:23  iter: 79  total_loss: 2.297  loss_cls: 0.6555  loss_box_reg: 0.598  loss_mask: 0.688  loss_rpn_cls: 0.1446  loss_rpn_loc: 0.1781  time: 25.8626 

## Testing

In [None]:
cfg_path = r"cfg_model.pickle"
predictor = make_inferences(cfg_path, path, microcontroller_metadata, thresh=0.1, bounding_box=True)

In [None]:
evaluate(cfg_path, predictor)

## Prediction

In [None]:
cfg_path = r"cfg_model.pickle"
image_path = r"Images/train/render9701.png"

prediction(cfg_path, image_path, microcontroller_metadata, filename="render9701.png", bounding_box=True, thresh=0.25)

In [None]:
cfg_path = r"cfg_model.pickle"
image_path = r"Images/train/render1467.png"

prediction(cfg_path, image_path, microcontroller_metadata, filename="render1467.png", bounding_box=True, thresh=0.25)