In [None]:
# Helper function, used these for debugging purposes
# detector2 build only succeeds if CUDA version is correct

#!nvidia-smi
#!nvcc --version

#import torch
#torch.__version__
#import torchvision
#torchvision.__version__

!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.7/index.html
!pip install fastai

Underneed you'll find the extra libraries we'll use in this notebook. More libraries will be added througout the notebook when needed.

In [None]:
# Base setup:
# detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# common libraries
import numpy as np
import os, json, cv2, random
import matplotlib.pyplot as plt
%matplotlib inline

# detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.structures import BoxMode

In [None]:
training_path = "/kaggle/input/dsta-brainhack-2021/c1_release/c1_release"
train_annotation = os.path.join(training_path, "train.json")
val_annotation = os.path.join(training_path, "val.json")
image_path = os.path.join(training_path,"images")



from detectron2.structures import BoxMode
# if your dataset is in COCO format, this cell can be replaced by the following three lines:
from detectron2.data.datasets import register_coco_instances
register_coco_instances("train_data", {}, train_annotation, image_path)
register_coco_instances("val_data", {}, val_annotation, image_path)

<a id=traincustom> </a>
## 4.3. Training with a custom dataset
Let's first check our training data! Ofcourse we'll use the **Visualizer** class again.

In [None]:


#visualize training data
my_dataset_train_metadata = MetadataCatalog.get("train_data")
dataset_dicts = DatasetCatalog.get("train_data")

my_dataset_val_metadata = MetadataCatalog.get("val_data")
val_dicts = DatasetCatalog.get("val_data")

import random
from detectron2.utils.visualizer import Visualizer
import cv2
import matplotlib.pyplot as plt

for d in random.sample(dataset_dicts, 3):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=my_dataset_train_metadata, scale=0.5)
    vis = visualizer.draw_dataset_dict(d)
    plt.imshow(vis.get_image()[:, :, ::-1])

# 4Training

In [None]:
# # DATA AUG

# from detectron2.data import transforms as T
# # Define a sequence of augmentations:
# augs = T.AugmentationList([
#     T.RandomBrightness(0.9, 1.1),
#     T.RandomFlip(prob=0.5),
#     T.RandomCrop("absolute", (640, 640))
# ])  # type: T.Augmentation

# # Define the augmentation input ("image" required, others optional):
# input = T.AugInput(image, boxes=boxes, sem_seg=sem_seg)

# # Apply the augmentation:
# transform = augs(input)  # type: T.Transform
# image_transformed = input.image  # new image
# sem_seg_transformed = input.sem_seg  # new semantic segmentation

# # For any extra data that needs to be augmented together, use transform, e.g.:
# image2_transformed = transform.apply_image(image2)
# polygons_transformed = transform.apply_polygons(polygons)

In [None]:
# # Run training

from detectron2.engine import DefaultTrainer
from detectron2.evaluation import COCOEvaluator


class CocoTrainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            os.makedirs("coco_eval", exist_ok=True)
            output_folder = "coco_eval"
        return COCOEvaluator(dataset_name, cfg, False, output_folder)
    


#### FASTERCNN


In [None]:
# LOADING PREV FORMAT

from detectron2.config.config import CfgNode as CN

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("train_data",)
cfg.DATASETS.TEST = ("val_data",)
cfg.DATALOADER.NUM_WORKERS = 4
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 4
cfg.SOLVER.BASE_LR = 0.001
cfg.SOLVER.WARMUP_ITERS = 1000
cfg.SOLVER.MAX_ITER = 40000 #adjust up if val mAP is still rising, adjust down if overfit
cfg.SOLVER.STEPS = [10000,20000,30000]
cfg.SOLVER.GAMMA = 0.05
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 64
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5
# cfg.TEST.EVAL_PERIOD = 1000


os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = CocoTrainer(cfg) 
# trainer.resume_or_load(resume=False)
trainer.resume_or_load(resume=False)

trainer.train()

#### RetinaNet

In [None]:
# cfg = get_cfg()
# cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/retinanet_R_50_FPN_1x.yaml"))
# cfg.DATASETS.TRAIN = ("train_data",)
# cfg.DATASETS.TEST = ("val_data",)
# cfg.DATALOADER.NUM_WORKERS = 4
# cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/retinanet_R_50_FPN_1x.yaml")  # Let training initialize from model zoo
# cfg.SOLVER.IMS_PER_BATCH = 4
# cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
# cfg.SOLVER.MAX_ITER = 300    # 300 iterations enough for this dataset; Train longer for a practical dataset
# cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # faster, enough for this dataset (default: 512)
# # cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5  # classes for RCNN
# cfg.MODEL.RETINANET.NUM_CLASSES = 5 # Classes for Retina
# cfg.TEST.EVAL_PERIOD = 500


# os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
# trainer = DefaultTrainer(cfg) 
# trainer.resume_or_load(resume=False)
# trainer.train()

<a id="modelevaluation" ></a>
## 4.4. Model evaluation
Let's check out the performance of our model!

First of all let's make some predictions! We're going to use the [**DefaultPredictor**](https://detectron2.readthedocs.io/en/latest/modules/engine.html?highlight=DefaultPredictor#detectron2.engine.defaults.DefaultPredictor) class. Ofcourse we'll use the same cfg that we used during training. We'll change two parameters for our inferencing.

### Loading model

In [None]:
# RELOADING MODEL
# cfg = get_cfg()
# cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))
# cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
# cfg.MODEL.WEIGHTS = os.path.join("/kaggle/input/detectronmodel20k", "model_final (1).pth")  # path to the model we trained
# cfg.DATASETS.TRAIN = ("train_data",)
# cfg.DATASETS.TEST = ("val_data",)
# cfg.DATALOADER.NUM_WORKERS = 4
# cfg.SOLVER.IMS_PER_BATCH = 4
# cfg.SOLVER.BASE_LR = 0.001
# cfg.SOLVER.WARMUP_ITERS = 1000
# cfg.SOLVER.MAX_ITER = 10000 #adjust up if val mAP is still rising, adjust down if overfit
# # cfg.SOLVER.STEPS = [0,20000,40000]
# cfg.SOLVER.GAMMA = 0.05
# cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 64
# cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5




# model saved weights
# cfg.MODEL.WEIGHTS = os.path.join("/kaggle/input/objectron-retinanetv1/model_final.pth")

cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7   # set a testing threshold
predictor = DefaultPredictor(cfg)



Notice that by using the [**ColorMode.IMAGE_BW**](https://detectron2.readthedocs.io/en/latest/modules/utils.html?highlight=ColorMode#module-detectron2.utils.visualizer) we we're capable of removing the colors from objects which aren't detected!

In [None]:
from detectron2.utils.visualizer import ColorMode
val_dict = DatasetCatalog.get("val_data")

for d in random.sample(val_dict, 3):    
    im = cv2.imread(d["file_name"])
    outputs = predictor(im) 
    v = Visualizer(im[:, :, ::-1],
                   metadata=my_dataset_train_metadata, 
                   scale=0.5, 
                   instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels. Only available for segmentation models
    )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    plt.figure(figsize=(15,7))
    plt.imshow(out.get_image()[:, :, ::-1][..., ::-1])

Above we can see that our models performs pretty well! Let's now evaluate our custom model with [Evaluators](https://detectron2.readthedocs.io/en/latest/modules/engine.html?highlight=DefaultPredictor#detectron2.engine.defaults.DefaultPredictor). Two evaluators can be used:
* [**COCOEvaluator**](https://detectron2.readthedocs.io/en/latest/modules/evaluation.html#detectron2.evaluation.COCOEvaluator) can evaluate AP (Average Precision) for box detection, instance segmentation and keypoint detection.
* [**SemSegEvaluator**](https://detectron2.readthedocs.io/en/latest/modules/evaluation.html#detectron2.evaluation.SemSegEvaluator) can evaluate semantic segmentation metrics.

Afterwards we'll use the [**build_detection_test_loader**](https://detectron2.readthedocs.io/en/latest/modules/data.html?highlight=build_detection_test_loader#detectron2.data.build_detection_test_loader) which returns a torch DataLoader, that loads the given detection dataset.

At last we'll use the model, evaluated and dataloader within the [inference_on_dataset](https://detectron2.readthedocs.io/en/latest/modules/evaluation.html#detectron2.evaluation.inference_on_dataset) function. It runs the model on the dataloader and evaluates the metric with the evaluator.

In [None]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
from detectron2.modeling import build_model


evaluator = COCOEvaluator("val_data", None, False, output_dir="./output/")
# evaluator = COCOEvaluator("val_data", ("bbox", "segm"), False, output_dir="./output/")

# Loading model
model_uploaded = build_model(cfg)

val_loader = build_detection_test_loader(cfg, "val_data")
# print(inference_on_dataset(trainer.model, val_loader, evaluator))
print(inference_on_dataset(model_uploaded, val_loader, evaluator))

## Test

In [None]:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
# cfg.MODEL.WEIGHTS = os.path.join("/kaggle/input/objectron-retinanetv1/model_final.pth")


test_img_path = "/kaggle/input/dsta-brainhack-2021/c1_test_release/c1_test_release/images" # extracted testing images path
cfg.DATASETS.TEST = ("my_dataset_test", )
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7   # set the testing threshold for this model
test_metadata = MetadataCatalog.get("my_dataset_test")

from detectron2.utils.visualizer import ColorMode
import glob

ou_test = []
for imageName in glob.glob('/kaggle/input/dsta-brainhack-2021/c1_test_release/c1_test_release/images/*.jpg'):
  im = cv2.imread(imageName)
  outputs = predictor(im)
  ou_test.append(outputs)
  v = Visualizer(im[:, :, ::-1],
                metadata=my_dataset_train_metadata, 
                scale=0.8
                 )
  out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
  plt.imshow(out.get_image()[:, :, ::-1])
    
    
    # im = cv2.imread("/kaggle/input/dsta-brainhack-2021/c1_test_release/c1_test_release/images/1.jpg")
# outputs = predictor(im)
# v = Visualizer(im[:, :, ::-1],
#             metadata=my_dataset_train_metadata, 
#             scale=0.8
#              )
# out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
# plt.imshow(out.get_image()[:, :, ::-1])


In [None]:
# generate detections on the folder of test images (this will be used for submission)
from PIL import Image, ImageDraw
from torchvision import transforms
from torchvision.ops import batched_nms
from torchvision.transforms import functional as F
import torch

detections = []

for imageName in glob.glob('/kaggle/input/dsta-brainhack-2021/c1_test_release/c1_test_release/images/*jpg'):

        im = cv2.imread(imageName)
        outputs = predictor(im)
        classes = outputs["instances"].pred_classes.tolist()
        box_round = outputs["instances"].pred_boxes.tensor.tolist()
        score_output = outputs["instances"].scores.tolist()
        head, tail = os.path.split(imageName)
        img_id = int(tail.split('.')[0])

        for i in range(len(box_round)):

            x1, y1, x2, y2 = box_round[i]
            label = int(classes[i]) + 1
            score = float(score_output[i])

            left = int(x1)
            top = int(y1)
            width = int(x2 - x1)
            height = int(y2 - y1)

            detections.append({'image_id':img_id, 'category_id':label, 'bbox':[left, top, width, height], 'score':score})

test_pred_json = os.path.join("/kaggle/working", "test_pred_2.json")
with open(test_pred_json, 'w') as f:
    json.dump(detections, f)

In [None]:
# Check 
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

sample_json_path = os.path.join("/kaggle/input/dsta-brainhack-2021/c1_test_release/c1_test_release", "c1_test_sample.json")

coco_gt = COCO(sample_json_path)
coco_dt = coco_gt.loadRes(test_pred_json)
cocoEval = COCOeval(cocoGt=coco_gt, cocoDt=coco_dt, iouType='bbox')
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()

<a id="othermodels" ></a>
# 5. Other models

It's possible to use other high-end object detection models aswell. Let's check it out!

<a id="keypoint" ></a>
## 5.1. Keypoint detection

Reload the data.

In [None]:
# !wget http://images.cocodataset.org/val2017/000000282037.jpg -q -O input.jpg
# im = cv2.imread("./input.jpg")

In [None]:
# cfg = get_cfg()   # fresh config
# cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"))
# cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7  # set threshold for this model
# cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml")
# predictor = DefaultPredictor(cfg)
# outputs = predictor(im)
# v = Visualizer(im[:,:,::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
# out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
# plt.figure(figsize=(15,7))
# plt.imshow(out.get_image()[:, :, ::-1][..., ::-1])

<a id="panoptic" ></a>
## 5.2. Panoptic segmentation

In [None]:
# !wget http://images.cocodataset.org/val2017/000000282037.jpg -q -O input.jpg
# im = cv2.imread("./input.jpg")

In [None]:
# cfg = get_cfg()
# cfg.merge_from_file(model_zoo.get_config_file("COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml"))
# cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml")
# predictor = DefaultPredictor(cfg)
# panoptic_seg, segments_info = predictor(im)["panoptic_seg"]
# v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
# out = v.draw_panoptic_seg_predictions(panoptic_seg.to("cpu"), segments_info)
# plt.figure(figsize=(25,15))
# plt.imshow(out.get_image()[:, :, ::-1][..., ::-1])

<a id="semantic" ></a>
## 5.3. Semantic, Densepose, ...

Will be added in a future version! Stay tuned!

<a id="video" ></a>
# 6. Video

So up until now we've been working with images only. Can we quickly use the models for videos? The answer is YES!

<a id="videolib" ></a>
## 6.1. Libraries
As you can see we actually don't need many other libraries. Lets import a library to handle the video.

In [None]:
# from IPython.display import YouTubeVideo, display, Video # for viewing the video
# !pip install youtube-dl # for downloading the video

<a id="thevideo" ></a>
## 6.2. The video

In [None]:
# #video = YouTubeVideo("ll8TgCZ0plk", width=500)#7HaJArMDKgI
# video = YouTubeVideo("7HaJArMDKgI", width=750, height= 450)#
# display(video)

Downloading the video and cropping 6 seconds for processing


In [None]:
# !youtube-dl https://www.youtube.com/watch?v=7HaJArMDKgI -f 22 -o video.mp4
# !ffmpeg -i video.mp4 -t 00:00:10 -c:v copy video-clip.mp4 

<a id="videoinference" ></a>
## 6.3. Inference on the video
Let's now run an panoptic model over the video above.

*note: For now I'll be using some [demo](https://github.com/facebookresearch/detectron2/tree/master/demo) files, I'll later add the code implementations to this notebook.*

In [None]:
# !git clone https://github.com/facebookresearch/detectron2
# !python detectron2/demo/demo.py --config-file detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml --video-input video-clip.mp4 --confidence-threshold 0.6 --output 1video-output.mkv \
#   --opts MODEL.WEIGHTS detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_101_3x/139514519/model_final_cafdb1.pkl

Let's check the result! 

*I've ran into some trouble with video encoding opencv and ffmpeg (fix in future version of this notebook).*

In [None]:
# !git clone https://github.com/vandeveldemaarten/tempdetector2video.git
# Video("./tempdetector2video/myvideo.mkv")

# That's all for now!

Thank you for reading this notebook! If you enjoyed it, please upvote!

*More coming soon!*