In [1]:
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.8/index.html
exit(0)

Looking in links: https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.8/index.html


In [1]:
!pwd

/content


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [2]:
cd /content/drive/MyDrive/rohit-tech-work/segmentation_detectron2/instance_segmentation_detectron2

/content/drive/.shortcut-targets-by-id/1ONKzZRQrdbwZEGFgDw8qOg1GJVE828hi/rohit-tech-work/segmentation_detectron2/instance_segmentation_detectron2


In [3]:
!pwd

/content/drive/.shortcut-targets-by-id/1ONKzZRQrdbwZEGFgDw8qOg1GJVE828hi/rohit-tech-work/segmentation_detectron2/instance_segmentation_detectron2


In [10]:
import torch
import torchvision
import detectron2
from detectron2.utils.logger import setup_logger
import numpy as np
import os
import json
import cv2
import random
from google.colab.patches import cv2_imshow
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.utils.visualizer import ColorMode
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data import DatasetMapper, build_detection_train_loader
from detectron2.data import DatasetCatalog
from detectron2.engine import DefaultTrainer
from PIL import Image


def get_dataset(JSON_PATH):
    """[utility function to supply data in list[dict] format for registration in detectron2]

    Args:
            JSON_PATH ([str]): [path to the json data file]

    Returns:
            [list[dict]]: [the data format needed for detectron2]
    """
    with open(JSON_PATH) as f:
        data = json.load(f)
    return data


def register_dataset(dataset_name, input_json_path, thing_classes):
    """[function to register dataset in detectron2 format]

    Args:
            dataset_name ([str]): [name of the dataset to register]
            input_json_path ([str]): [path to the json in detectron2 format]
            keypoint_names ([list[str]]): [names of the keypoints to detect]
            keypoint_flip_map ([list[tuple(str)]]): [the keypoints which flip the relative position during flip augmentation]
            thing_classes ([list[str]]): [thing classes- the class of the bounding boxes]

    Returns:
            [type]: [metadata]
    """

    DatasetCatalog.register(dataset_name, lambda: get_dataset(input_json_path))
    MetadataCatalog.get(dataset_name).set(thing_classes=thing_classes)
    metadata = MetadataCatalog.get(dataset_name)

    return metadata


def create_config(config_dict):
    """[function to create config file for model training]

    Args:
            config_dict ([dict{}]): [the configuations to apply]

    Returns:
            [type]: [config]
    """

    cfg = get_cfg()
    cfg.MODEL.DEVICE=config_dict["DEVICE"] # 'cpu' or 'cuda'
    cfg.OUTPUT_DIR = config_dict["model_out_dir"]
    cfg.merge_from_file(model_zoo.get_config_file(
        config_dict["model_yaml_file"]))
    cfg.DATASETS.TRAIN = (config_dict["train_dataset_name"],)
    cfg.DATASETS.TEST = (config_dict["test_dataset_name"])
    cfg.DATALOADER.NUM_WORKERS = config_dict["NUM_WORKERS"]
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
        config_dict["model_yaml_file"])
    cfg.SOLVER.IMS_PER_BATCH = config_dict["IMS_PER_BATCH"]
    cfg.SOLVER.BASE_LR = config_dict["BASE_LR"]
    cfg.SOLVER.MAX_ITER = config_dict["MAX_ITER"]
    cfg.SOLVER.STEPS = []
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = config_dict["NUM_CLASSES"]
    #cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = config_dict["NUM_KEYPOINTS"]
    # cfg.TEST.KEYPOINT_OKS_SIGMAS=[1.0, 1.0 ,1.0 ,1.0, 1.0]

    return cfg


def train_model(cfg):
    """[function to train the segment detection model]

    Args:
            cfg ([type]): [configuration file]
    """

    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    trainer = DefaultTrainer(cfg)
    trainer.resume_or_load(resume=True)
    trainer.train()


def get_predictor(cfg):
    """[summary]

    Args:
            cfg ([type]): [configuration file]

    Returns:
            [type]: [predictor on the trained model]
    """

    cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
    predictor = DefaultPredictor(cfg)

    return predictor


def run_inference(predictor, metadata, input_json_path):
    """[function to run inference on test images]

    Args:
            predictor ([type]): [predictor on the trained images]
            metadata ([type]): [metadata of the registered dataset]
            input_json_path ([str]): [path to the data json file to run inference on]
    """

    dataset_dicts = get_dataset(input_json_path)
    for d in dataset_dicts:
        im = cv2.imread(d["file_name"])
        outputs = predictor(im)

        v = Visualizer(im[:, :, ::-1], metadata=metadata, scale=2, instance_mode=ColorMode.IMAGE_BW)
        out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
        cv2_imshow(out.get_image()[:, :, ::-1])



'''
def get_predicted_keypoints(cfg, image_file_bytes):
    """[summary]

    Args:
            cfg ([type]): [config file]
            image_file ([_io.BytesIO object]): [image bytes to get predictions on]

    Returns:
            [tensor]]: [the keypoint predictions - one tensor for each image, Tensor of shape (N, num_keypoint, 3). Each row in the last dimension is (x, y, score). Confidence scores are larger than 0]
    """

    predictor = get_predictor(cfg)
    image = Image.open(image_file_bytes).convert("RGB")
    image.shape = image.size
    image = np.asanyarray(image)
    #im = cv2.imread(d["file_name"])
    output = predictor(image)
    #outputs.append(output["instances"].pred_keypoints)

    return output["instances"].pred_keypoints

'''



if __name__ == '__main__':

    config_dict = {
        "DEVICE" : "cuda", #"cpu" or "cuda"
        "model_out_dir": "trained_model",
        "model_yaml_file": "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml",
        "data_base_dir": "data",
        "images_base_dir": "data/images",
        "train_dataset_name": "instance_segmentation_test1",
        "test_dataset_name": (),
        "input_json_path": "detectron2_format_instance_segmentation_data.json",
        "NUM_WORKERS": 2,
        "IMS_PER_BATCH": 2,
        "BASE_LR": 0.00025,
        "MAX_ITER": 5000,
        "NUM_CLASSES": 3,
        "thing_classes": ["Dog", "Cat"]
    }



    # Register the dataset
    # if already registered load the metadata

    print("\n Starting dataset Registration\n")
    try:
        # for metadata by registering new dataset
        metadata = register_dataset(dataset_name=config_dict["train_dataset_name"], input_json_path=os.path.join(config_dict["data_base_dir"], config_dict["input_json_path"]), thing_classes=config_dict["thing_classes"])
        print("\n Registered New Dataset\n")
    except:
        # for metadata from MetaDataCatalog
        metadata = MetadataCatalog.get(config_dict["train_dataset_name"])
        print("\n Loaded metadata from already registered dataset\n")

    # create the cfg file for detectron2
    print("\n Creating config\n")
    config = create_config(config_dict)

    # create the model directory
    os.makedirs(config_dict["model_out_dir"], exist_ok='true')

    # train the model
    print("\n Initiate Training\n")
    train_model(config)
    print("\n Training Finished!!!\n")



    # get the predictor
    predictor = get_predictor(config)

    # run inference on the data
    run_inference(predictor, metadata, input_json_path=os.path.join(config_dict["data_base_dir"], config_dict["input_json_path"]))



Output hidden; open in https://colab.research.google.com to view.

In [11]:
for i in range(1,11):
    filename = "data/test_images/dc"+str(i)+".jpg"
    print(filename)
    print()

    im = cv2.imread(filename)
    outputs = predictor(im)

    v = Visualizer(im[:, :, ::-1], metadata=metadata, scale=2, instance_mode=ColorMode.IMAGE_BW)
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    cv2_imshow(out.get_image()[:, :, ::-1])
    print()


Output hidden; open in https://colab.research.google.com to view.