# Object Detection

[Detectron2](https://detectron2.readthedocs.io/en/latest/) is a platform for object detection, segmentation and other visual recognition tasks.

* Includes new capabilities such as panoptic segmentation, Densepose, Cascade R-CNN, rotated bounding boxes, PointRend, DeepLab, ViTDet, MViTv2 etc.
* Used as a library to support building research projects on top of it.
* Models can be exported to TorchScript format or Caffe2 format for deployment.

In [None]:
!pip install opencv-python matplotlib

In [None]:
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

## Detectron2 :: RetinaNetR50 FPN

In [9]:
import detectron2

#https://github.com/facebookresearch/detectron2/blob/main/MODEL_ZOO.md
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

import matplotlib.pyplot as plt
import numpy as np
import torch, os, json, cv2, random

In [3]:
# Setup detectron2 logger
from detectron2.utils.logger import setup_logger
setup_logger()

<Logger detectron2 (DEBUG)>

### COCO dataset

In [12]:
# https://cocodataset.org/#home
metadata = MetadataCatalog.get("coco_2017_val")

In [13]:
# Retrieve class names from metadata
class_names = metadata.get("thing_classes")

In [14]:
# Print the predicted class names
print("Classes:", len(class_names))
print("Class Names:", class_names)

Classes: 80
Class Names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


* Classes: 80
* Class Names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

### Create Config

In [95]:
#https://github.com/facebookresearch/detectron2/tree/main/configs/COCO-Detection
cfg_ret50fpn = get_cfg()
cfg_ret50fpn.merge_from_file(model_zoo.get_config_file("COCO-Detection/retinanet_R_50_FPN_1x.yaml"))
cfg_ret50fpn.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05  # set threshold for this model
cfg_ret50fpn.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/retinanet_R_50_FPN_1x.yaml")

Loading config /opt/conda/lib/python3.10/site-packages/detectron2/model_zoo/configs/COCO-Detection/../Base-RetinaNet.yaml with yaml.unsafe_load. Your machine may be at risk if the file contains malicious content.


### Create predictor

In [96]:
predictor_ret50fpn = DefaultPredictor(cfg_ret50fpn)

[32m[08/25 13:21:43 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_1x/190397773/model_final_bfca0b.pkl ...


The checkpoint state_dict contains keys that are not used by the model:
  [35mpixel_mean[0m
  [35mpixel_std[0m


### Make Prediction

In [97]:
image1 = plt.imread('../datasets/snapshots/Harbin.jpg')
image2 = plt.imread('../datasets/snapshots/Hongkong.jpg')
image3 = plt.imread('../datasets/snapshots/Kathmandu.jpg')
image4 = plt.imread('../datasets/snapshots/Tea.jpg')

In [98]:
pred1 = predictor_ret50fpn(image1)
pred2 = predictor_ret50fpn(image2)
pred3 = predictor_ret50fpn(image3)
pred4 = predictor_ret50fpn(image4)

In [129]:
print(pred1)

{'instances': Instances(num_instances=100, image_height=1262, image_width=1920, fields=[pred_boxes: Boxes(tensor([[1.1689e+03, 5.7320e+02, 1.4864e+03, 1.2534e+03],
        [1.5843e+03, 5.4501e+02, 1.8076e+03, 9.7378e+02],
        [4.9123e+02, 5.6845e+02, 6.4869e+02, 1.0137e+03],
        [1.0124e+03, 5.1395e+02, 1.1096e+03, 5.8563e+02],
        [8.0666e+01, 5.0960e+02, 1.9920e+02, 7.8927e+02],
        [6.2540e+02, 5.2091e+02, 6.9306e+02, 7.1227e+02],
        [1.2250e+03, 4.9066e+02, 1.4520e+03, 6.1863e+02],
        [6.7986e+02, 5.3053e+02, 8.7354e+02, 6.3220e+02],
        [8.8426e+02, 5.2187e+02, 1.0007e+03, 6.0515e+02],
        [1.0952e+03, 5.0415e+02, 1.2185e+03, 5.8415e+02],
        [2.1672e+02, 5.1427e+02, 2.8334e+02, 6.5330e+02],
        [1.2033e+03, 5.1054e+02, 1.2439e+03, 6.1989e+02],
        [3.9366e+02, 5.1759e+02, 4.7106e+02, 7.2698e+02],
        [8.9200e+02, 4.2848e+02, 9.8985e+02, 5.2964e+02],
        [4.1123e+01, 5.1921e+02, 8.4309e+01, 6.3925e+02],
        [1.4999e+03, 2.2

### Visualize Results

In [99]:
visualized_pred1 = Visualizer(image1[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
visualized_pred1 = visualized_pred1.draw_instance_predictions(pred1["instances"].to("cpu"))

visualized_pred2 = Visualizer(image2[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
visualized_pred2 = visualized_pred2.draw_instance_predictions(pred2["instances"].to("cpu"))

visualized_pred3 = Visualizer(image3[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
visualized_pred3 = visualized_pred3.draw_instance_predictions(pred3["instances"].to("cpu"))

visualized_pred4 = Visualizer(image4[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
visualized_pred4 = visualized_pred4.draw_instance_predictions(pred4["instances"].to("cpu"))

In [None]:
plt.figure(figsize=(24, 14))

ax = plt.subplot(2, 2, 1)
plt.title('Harbin')
plt.imshow(visualized_pred1.get_image()[:, :, ::-1])
plt.axis("off")
ax = plt.subplot(2, 2, 2)
plt.title('Hongkong')
plt.imshow(visualized_pred2.get_image()[:, :, ::-1])
plt.axis("off")
ax = plt.subplot(2, 2, 3)
plt.title('Kathmandu')
plt.imshow(visualized_pred3.get_image()[:, :, ::-1])
plt.axis("off")
ax = plt.subplot(2, 2, 4)
plt.title('Shenzhen')
plt.imshow(visualized_pred4.get_image()[:, :, ::-1])
plt.axis("off")

plt.savefig("./assets/Object_Detection_Detectron2_01.webp", bbox_inches='tight')

![Detectron2 :: RetinaNetR50 FPN](./assets/Object_Detection_Detectron2_01.webp)

In [101]:
# Get the predicted class labels for each instance in the image1
pred4_classes = pred4["instances"].pred_classes.tolist()
# Map the predicted class labels to class names
pred4_class_names = [class_names[class_id] for class_id in pred4_classes]

In [102]:
print("Predicted Class Names:", pred4_class_names)

Predicted Class Names: ['cup', 'teddy bear', 'dining table', 'chair', 'umbrella', 'backpack', 'backpack', 'chair', 'chair', 'backpack', 'handbag', 'bowl', 'handbag', 'backpack', 'couch', 'fire hydrant', 'handbag', 'handbag', 'motorcycle', 'chair', 'spoon', 'chair', 'chair', 'chair', 'couch', 'dining table', 'chair', 'couch', 'chair', 'bowl', 'cup', 'spoon', 'fire hydrant', 'suitcase', 'chair', 'chair', 'couch', 'dining table', 'couch', 'teddy bear', 'backpack', 'spoon', 'backpack', 'chair', 'chair', 'cake', 'spoon', 'chair', 'umbrella', 'spoon', 'suitcase', 'dining table', 'bench', 'handbag', 'hot dog', 'person', 'chair', 'chair', 'bottle', 'motorcycle', 'dining table', 'surfboard', 'chair', 'dog', 'cup', 'bed', 'fire hydrant', 'bench', 'spoon', 'chair', 'dining table', 'person', 'cup', 'motorcycle', 'dining table', 'handbag', 'dining table', 'backpack', 'person', 'book', 'dog', 'spoon', 'traffic light', 'spoon', 'car', 'cup', 'chair', 'chair', 'umbrella', 'potted plant', 'umbrella', '

In [103]:
print(pred4["instances"].pred_boxes)

Boxes(tensor([[7.5032e+02, 8.3579e+02, 1.0165e+03, 1.0919e+03],
        [4.5623e+02, 3.1689e+02, 8.1636e+02, 7.4694e+02],
        [0.0000e+00, 6.1417e+02, 1.3399e+03, 1.1833e+03],
        [7.2032e+02, 1.0723e+02, 8.9391e+02, 4.4559e+02],
        [3.2083e+02, 3.0528e+01, 7.7024e+02, 2.1227e+02],
        [8.4085e+02, 3.9479e+02, 1.3105e+03, 6.9813e+02],
        [7.5880e+02, 2.2533e+02, 1.3731e+03, 7.2955e+02],
        [7.8558e+02, 2.2329e+02, 1.3726e+03, 6.8365e+02],
        [1.1860e+03, 2.5116e+01, 1.2779e+03, 2.7582e+02],
        [8.1013e+02, 4.0019e+02, 9.8632e+02, 5.8373e+02],
        [8.1013e+02, 4.0019e+02, 9.8632e+02, 5.8373e+02],
        [8.2481e+02, 7.2680e+02, 1.1324e+03, 8.6498e+02],
        [7.4434e+02, 2.8724e+02, 1.3580e+03, 7.3182e+02],
        [6.8704e+02, 3.9568e+02, 1.0127e+03, 6.0175e+02],
        [7.8022e+02, 2.1769e+02, 1.3776e+03, 7.3585e+02],
        [1.5722e+02, 6.5533e+02, 4.3857e+02, 1.0510e+03],
        [6.8969e+02, 4.0100e+02, 1.0348e+03, 6.1360e+02],
        

## Detectron2 :: Faster RCNN R101 FPN

In [109]:
#https://github.com/facebookresearch/detectron2/tree/main/configs/COCO-Detection
cfg_frcnn101fpn = get_cfg()
cfg_frcnn101fpn.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))
cfg_frcnn101fpn.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
cfg_frcnn101fpn.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml")

In [110]:
predictor_frcnn101fpn = DefaultPredictor(cfg_frcnn101fpn)

[32m[08/25 13:29:19 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/model_final_f6e8b1.pkl ...


In [111]:
pred5 = predictor_frcnn101fpn(image1)
pred6 = predictor_frcnn101fpn(image2)
pred7 = predictor_frcnn101fpn(image3)
pred8 = predictor_frcnn101fpn(image4)

In [112]:
visualized_pred5 = Visualizer(image1[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
visualized_pred5 = visualized_pred5.draw_instance_predictions(pred5["instances"].to("cpu"))

visualized_pred6 = Visualizer(image2[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
visualized_pred6 = visualized_pred6.draw_instance_predictions(pred6["instances"].to("cpu"))

visualized_pred7 = Visualizer(image3[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
visualized_pred7 = visualized_pred7.draw_instance_predictions(pred7["instances"].to("cpu"))

visualized_pred8 = Visualizer(image4[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
visualized_pred8 = visualized_pred8.draw_instance_predictions(pred8["instances"].to("cpu"))

In [None]:
plt.figure(figsize=(24, 14))

ax = plt.subplot(2, 2, 1)
plt.title('Harbin')
plt.imshow(visualized_pred5.get_image()[:, :, ::-1])
plt.axis("off")
ax = plt.subplot(2, 2, 2)
plt.title('Hongkong')
plt.imshow(visualized_pred6.get_image()[:, :, ::-1])
plt.axis("off")
ax = plt.subplot(2, 2, 3)
plt.title('Kathmandu')
plt.imshow(visualized_pred7.get_image()[:, :, ::-1])
plt.axis("off")
ax = plt.subplot(2, 2, 4)
plt.title('Shenzhen')
plt.imshow(visualized_pred8.get_image()[:, :, ::-1])
plt.axis("off")

plt.savefig("../assets/Object_Detection_Detectron2_02.webp", bbox_inches='tight')

![Detectron2 :: Faster RCNN R101 FPN](./assets/Object_Detection_Detectron2_02.webp)

In [117]:
# Get the predicted class labels for each instance in the image1
pred8_classes = pred8["instances"].pred_classes.tolist()
# Map the predicted class labels to class names
pred8_class_names = [class_names[class_id] for class_id in pred8_classes]

In [118]:
print("Predicted Class Names:", pred8_class_names)

Predicted Class Names: ['cup', 'teddy bear', 'dining table', 'chair', 'backpack', 'chair', 'bowl']


In [119]:
print(pred8["instances"].pred_boxes)

Boxes(tensor([[ 749.6494,  837.2590, 1029.4165, 1090.3832],
        [ 446.9825,  315.1858,  796.4763,  810.3347],
        [   0.0000,  596.0012, 1343.7590, 1174.9941],
        [ 710.6813,  115.2629,  896.0451,  422.0094],
        [ 708.7178,  384.5089, 1328.7939,  706.4788],
        [ 780.0182,  225.4701, 1379.0797,  620.4463],
        [ 817.5123,  729.8907, 1131.7351,  870.5685]], device='cuda:0'))
