# Detectron2 Beginner's Tutorial

<img src="https://dl.fbaipublicfiles.com/detectron2/Detectron2-Logo-Horz.png" width="500">

Welcome to detectron2! This is the official colab tutorial of detectron2. Here, we will go through some basics usage of detectron2, including the following:
* Run inference on images or videos, with an existing detectron2 model
* Train a detectron2 model on a new dataset

You can make a copy of this tutorial by "File -> Open in playground mode" and make changes there. __DO NOT__ request access to this tutorial.


# Install detectron2

In [1]:
# !python -m pip install pyyaml==5.1
import sys, os, distutils.core
# Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities.
# See https://detectron2.readthedocs.io/tutorials/install.html for full installation instructions
# !git clone 'https://github.com/facebookresearch/detectron2'
# dist = distutils.core.run_setup("./detectron2/setup.py")
# !python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
# sys.path.insert(0, os.path.abspath('./detectron2'))

# Properly install detectron2. (Please do not install twice in both ways)
# !python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

In [2]:
import torch, detectron2
# !nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

  from .autonotebook import tqdm as notebook_tqdm


torch:  1.13 ; cuda:  cu117
detectron2: 0.6


In [3]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
# from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

In [4]:
def cv2_imshow(im, timeout=100):
    cv2.imshow("sample", im)
    cv2.waitKey(timeout)
    cv2.destroyAllWindows()

In [5]:
FIRST_TIME = False
DATA_PATH = "../data/"
DATA_PATH_BASE = DATA_PATH + "Waste_Bin_Detection_Dataset/"

TRAIN_NAME_ORIGINAL = "sunny_2021_03_23_14_33_cam5_filtered_detection_ground_truth_labels(training dataset).json"
TRAIN_NAME_ORIGINAL = "sunny_reduced_static"
# TRAIN_NAME_REDUCED = "sunny_2021_03_23_14_33_cam5_filtered_detection_ground_truth_labels(training dataset)_reduced.json"
TRAIN_NAME_REDUCED = "sunny_reduced_static.json"
TRAIN_NAME_GENERATED = "2021_03_25_14_04/merged_labels.json"
TRAIN_NAME_CLEANED = "2021_03_25_14_04/cleaned.json" # merged_labels_modified.json"
TRAIN_SNOWY_CLEANED_NAME = "2022_01_21_14_04/cleaned.json"
TRAIN_SNOWY2_CLEANED_NAME = "new/2021_02_18_06_28/cleaned.json" # 2021_02_18_06_28_unreduced_clean.json"
TRAIN_SNOWY_NAME = "2022_01_21_14_04/merged_labels_55.json" # "2022_01_21_14_04/reduced1.json"
TRAIN_SNOWY2_NAME = "new/2021_02_18_06_28/merged_labels_78.json" #"new/2021_02_18_06_28/merged_labels.json"
TRAIN_SNOWY_NAME_PREV = "2022_01_21_14_04/reduced1.json"
TRAIN_SNOWY2_NAME_PREV = "new/2021_02_18_06_28/merged_labels.json"
TRAIN_SNOWY_CLEANED_NAME_PREV = "2022_01_21_14_04/cleaned.json"
TRAIN_SNOWY2_CLEANED_NAME_PREV = "new/2021_02_18_06_28/2021_02_18_06_28_unreduced_clean.json"


# VAL_NAME = "detection_validation_dataset_ground_truth_labels(balanced).json"
VAL_NAME = "validation_set_static_gt.json"
# TEST_NAME = "detection_test_dataset_ground_truth_labels(balanced).json"
TEST_NAME = "test_set_static_gt.json"
TEST_SNOWY_DAY_NAME = "snowy_day.json"
TEST_SNOWY_NIGHT_NAME = "snowy_night.json"
TEST_RB_NAME = "rb.json"

TRAIN_PATH_GENERATED = DATA_PATH + "generated_trainings_data/"
TRAIN_PATH_ORIGINAL = DATA_PATH_BASE + "sunny_2021_03_23_14_33_cam5_filtered (training dataset images and ground truths)/"
VAL_PATH = DATA_PATH_BASE + "cloudy_2021_04_09_16_02_cam5_filtered (validation and test dataset images and ground truths)/"
TEST_PATH = VAL_PATH
TEST_SNOWY_PATH = DATA_PATH + "/test_datasets/snowy/"
TEST_RB_PATH = DATA_PATH + "/test_datasets/rb/"

all_inferences = {}
# VAL_PATH = DATA_PATH_BASE + "cloudy_2021_04_09_16_02_cam5_filtered (validation and test dataset images and ground truths)/"

In [6]:
from detectron2.data.datasets import register_coco_instances

generated_data_ad = ("new/2021_02_18_06_28/", "2021_03_25_14_04/", "new/2021_04_05_14_35/", "new/2021_05_18_14_02/", "new/2021_06_05_12_08/", "new/2021_07_07_06_41/", "new/2021_08_09_06_30/", "new/2021_09_15_06_28/", "new/2021_10_15_18_16/", "new/2021_11_02_12_59/", "new/2021_12_15_12_54/", "2022_01_21_14_04/")
generated_data_ad_path = "../data/generated_data_ad/"
registered_generated_data_ad = []

for dataset in generated_data_ad:
    data_name = dataset.split("/")[-2]
    registered_generated_data_ad.append(data_name)
    print("ds_name: ", data_name)
    register_coco_instances(data_name, {}, generated_data_ad_path + dataset + "merged_labels.json", generated_data_ad_path + dataset)



ds_name:  2021_02_18_06_28
ds_name:  2021_03_25_14_04
ds_name:  2021_04_05_14_35
ds_name:  2021_05_18_14_02
ds_name:  2021_06_05_12_08
ds_name:  2021_07_07_06_41
ds_name:  2021_08_09_06_30
ds_name:  2021_09_15_06_28
ds_name:  2021_10_15_18_16
ds_name:  2021_11_02_12_59
ds_name:  2021_12_15_12_54
ds_name:  2022_01_21_14_04


In [7]:


# registrate the snowy_prev datatsets
register_coco_instances("dataset_train_snowy_prev2", {}, TRAIN_PATH_GENERATED + TRAIN_SNOWY_NAME_PREV, TRAIN_PATH_GENERATED)
register_coco_instances("dataset_train_snowy2_prev2", {}, TRAIN_PATH_GENERATED + TRAIN_SNOWY2_NAME_PREV, TRAIN_PATH_GENERATED)
register_coco_instances("dataset_train_snowy_clean_prev2", {}, TRAIN_PATH_GENERATED + TRAIN_SNOWY_CLEANED_NAME_PREV, TRAIN_PATH_GENERATED)
register_coco_instances("dataset_train_snowy2_clean_prev2", {}, TRAIN_PATH_GENERATED + TRAIN_SNOWY2_CLEANED_NAME_PREV, TRAIN_PATH_GENERATED)

# register_coco_instances("dataset_train_snowy_prev", {}, DATA_PATH + TRAIN_SNOWY_NAME_PREV, TRAIN_SNOWY_PATH)

In [8]:
# register_coco_instances("dataset_train_reduced2", {}, TRAIN_PATH_ORIGINAL + TRAIN_NAME_REDUCED, TRAIN_PATH_ORIGINAL)


In [9]:
def split_into_train_val_test(label_path, train_split, val_split, test_split):
    train_name = label_path[:-5] + "_train.json"
    val_name = label_path[:-5] + "_val.json"
    test_name = label_path[:-5] + "_test.json"

    # check if the splits already exist
    if os.path.exists(train_name) and os.path.exists(val_name) and os.path.exists(test_name):
        print("Splits already exist!")
        return train_name, val_name, test_name

    with open(label_path) as f:
        gt_labels = json.load(f)
    
    # get the image ids
    image_ids = [img["id"] for img in gt_labels["images"]]
    num_images = len(image_ids)
    num_train = int(num_images * train_split)
    num_val = int(num_images * val_split)
    num_test = int(num_images * test_split)
    train_split = image_ids[:num_train]
    val_split = image_ids[num_train:num_train+num_val]
    test_split = image_ids[num_train+num_val:]
    print("Num train: ", len(train_split))
    print("Num val: ", len(val_split))
    print("Num test: ", len(test_split))
    # save the splits to a json file
    train_split_dict = {"images": [], "annotations": []}
    val_split_dict = {"images": [], "annotations": []}
    test_split_dict = {"images": [], "annotations": []}
    for img in gt_labels["images"]:
        if img["id"] in train_split:
            train_split_dict["images"].append(img)
        elif img["id"] in val_split:
            val_split_dict["images"].append(img)
        elif img["id"] in test_split:
            test_split_dict["images"].append(img)
        else:
            print("Image id not found in any split!")
    for ann in gt_labels["annotations"]:
        if ann["image_id"] in train_split:
            train_split_dict["annotations"].append(ann)
        elif ann["image_id"] in val_split:
            val_split_dict["annotations"].append(ann)
        elif ann["image_id"] in test_split:
            test_split_dict["annotations"].append(ann)
        else:
            print("Annotation image id not found in any split!")

    # add the other data required for coco format
    train_split_dict["info"] = gt_labels["info"]
    train_split_dict["licenses"] = gt_labels["licenses"]
    train_split_dict["categories"] = gt_labels["categories"]
    val_split_dict["info"] = gt_labels["info"]
    val_split_dict["licenses"] = gt_labels["licenses"]
    val_split_dict["categories"] = gt_labels["categories"]
    test_split_dict["info"] = gt_labels["info"]
    test_split_dict["licenses"] = gt_labels["licenses"]
    test_split_dict["categories"] = gt_labels["categories"]

    # assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds()))
    # asser that the set of image ids in the annotations is the same as the set of image ids in the images
    img_ids_train = set([img["id"] for img in train_split_dict["images"]])
    annot_img_ids_train = set([annot["image_id"] for annot in train_split_dict["annotations"]])
    assert annot_img_ids_train == annot_img_ids_train & img_ids_train
    img_ids_val = set([img["id"] for img in val_split_dict["images"]])
    annot_img_ids_val = set([annot["image_id"] for annot in val_split_dict["annotations"]])
    assert annot_img_ids_val == annot_img_ids_val & img_ids_val
    img_ids_test = set([img["id"] for img in test_split_dict["images"]])
    annot_img_ids_test = set([annot["image_id"] for annot in test_split_dict["annotations"]])
    assert annot_img_ids_test == annot_img_ids_test & img_ids_test
    
    # save the splits to a json file
    with open(train_name, "w") as f:
        json.dump(train_split_dict, f)
    with open(val_name, "w") as f:
        json.dump(val_split_dict, f)
    with open(test_name, "w") as f:
        json.dump(test_split_dict, f)

    return train_name, val_name, test_name

# label_path = "../data/Waste_Bin_Detection_Dataset/sunny_2021_03_23_14_33_cam5_filtered (training dataset images and ground truths)/"
# label_path += "sunny_reduced_static.json"
# split_into_train_val_test(label_path, 0.6, 0.2, 0.2)

In [10]:
# split the sunny, cloudy, snowy and rb datasets into train, val, and test
dataset_splits = {TRAIN_PATH_ORIGINAL + TRAIN_NAME_REDUCED: (0.5, 0.25, 0.25),
                VAL_PATH + VAL_NAME: (1.0, 0, 0), TEST_PATH + TEST_NAME: (0.0, 0.5, 0.5),
                TEST_SNOWY_PATH + TEST_SNOWY_DAY_NAME: (0.5, 0.25, 0.25),
                TEST_SNOWY_PATH + TEST_SNOWY_NIGHT_NAME: (0.5, 0.25, 0.25),
                TEST_RB_PATH + TEST_RB_NAME: (0.5, 0.25, 0.25)}
dataset_names = ["sunny", "cloudy_train", "cloudy_val", "snowy_day", "snowy_night", "rb"]
dataset_paths = [TRAIN_PATH_ORIGINAL, VAL_PATH, TEST_PATH, TEST_SNOWY_PATH, TEST_SNOWY_PATH, TEST_RB_PATH]
for i, p in enumerate(dataset_splits.keys()):
    train_name, val_name, test_name = split_into_train_val_test(p, dataset_splits[p][0], dataset_splits[p][1], dataset_splits[p][2])
    register_coco_instances(f"{dataset_names[i]}_train", {}, train_name, dataset_paths[i])
    register_coco_instances(f"{dataset_names[i]}_val", {}, val_name, dataset_paths[i])
    register_coco_instances(f"{dataset_names[i]}_test", {}, test_name, dataset_paths[i])


Splits already exist!
Splits already exist!
Splits already exist!
Splits already exist!
Splits already exist!
Splits already exist!


In [11]:
# if your dataset is in COCO format, this cell can be replaced by the following three lines:
from detectron2.data.datasets import register_coco_instances

register_coco_instances("dataset_train_original", {}, TRAIN_PATH_ORIGINAL + TRAIN_NAME_ORIGINAL, TRAIN_PATH_ORIGINAL)
register_coco_instances("dataset_train_generated", {}, TRAIN_PATH_GENERATED + TRAIN_NAME_GENERATED, TRAIN_PATH_GENERATED)
register_coco_instances("dataset_train_generated_cleaned", {}, TRAIN_PATH_GENERATED + TRAIN_NAME_CLEANED, TRAIN_PATH_GENERATED)
register_coco_instances("dataset_train_reduced", {}, TRAIN_PATH_ORIGINAL + TRAIN_NAME_REDUCED, TRAIN_PATH_ORIGINAL)

register_coco_instances("dataset_train_snowy", {}, TRAIN_PATH_GENERATED + TRAIN_SNOWY_NAME, TRAIN_PATH_GENERATED)
register_coco_instances("dataset_train_snowy2", {}, TRAIN_PATH_GENERATED + TRAIN_SNOWY2_NAME, TRAIN_PATH_GENERATED)
register_coco_instances("dataset_train_snowy_clean", {}, TRAIN_PATH_GENERATED + TRAIN_SNOWY_CLEANED_NAME, TRAIN_PATH_GENERATED)
register_coco_instances("dataset_train_snowy2_clean", {}, TRAIN_PATH_GENERATED + TRAIN_SNOWY2_CLEANED_NAME, TRAIN_PATH_GENERATED)

register_coco_instances("dataset_val", {}, VAL_PATH + VAL_NAME, VAL_PATH)
register_coco_instances("dataset_test", {}, TEST_PATH + TEST_NAME, TEST_PATH)

register_coco_instances("dataset_snowy_day", {}, TEST_SNOWY_PATH + TEST_SNOWY_DAY_NAME, TEST_SNOWY_PATH)
register_coco_instances("dataset_snowy_night", {}, TEST_SNOWY_PATH + TEST_SNOWY_NIGHT_NAME, TEST_SNOWY_PATH)
register_coco_instances("dataset_rb", {}, TEST_RB_PATH + TEST_RB_NAME, TEST_RB_PATH)

from detectron2.structures import BoxMode

In [12]:
register_coco_instances("dataset_rb2", {}, TEST_RB_PATH + TEST_RB_NAME, TEST_RB_PATH)

# Run a pre-trained detectron2 model

We first download an image from the COCO dataset:

In [13]:
#!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O input.jpg
#im = cv2.imread("./input.jpg")

# im = cv2.imread(TRAIN_PATH + "/images/camera5_1616524512_925074188.jpg")
im = cv2.imread(TRAIN_PATH_GENERATED + "/2021_03_25_14_04/images/camera5/1616697195_746899928.jpg")

# cv2.imshow("sample", im)
# cv2.waitKey(10000)
# cv2.destroyAllWindows()

# cv2_imshow(im)

Then, we create a detectron2 config and a detectron2 `DefaultPredictor` to run inference on this image.

In [14]:
cfg = get_cfg()
# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
# cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.005  # set threshold for this model

# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)
outputs = predictor(im)

[32m[06/09 04:55:06 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl ...


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [15]:
# look at the outputs. See https://detectron2.readthedocs.io/tutorials/models.html#model-output-format for specification
print(outputs["instances"].pred_classes)
print(outputs["instances"].pred_boxes)

tensor([ 2,  2,  2,  2, 11,  2,  2,  2,  2,  2,  2,  2,  2,  2,  7,  2,  2,  2,
         0,  9], device='cuda:0')
Boxes(tensor([[266.4833, 180.8726, 317.3206, 211.3985],
        [223.7761, 189.1363, 266.9907, 209.8377],
        [167.2536, 193.4805, 190.5445, 212.0146],
        [350.9119, 186.8939, 374.5399, 221.9578],
        [119.9063, 104.4992, 155.9364, 148.1499],
        [203.9761, 191.2700, 231.2309, 209.5346],
        [309.9510, 179.3568, 355.0557, 205.8382],
        [189.1120, 196.9571, 204.9933, 211.5256],
        [214.2793, 192.9922, 229.5054, 208.0728],
        [203.0917, 192.0317, 218.8095, 209.9182],
        [326.1642, 188.8716, 357.5744, 206.0282],
        [213.3864, 192.2182, 242.6201, 210.0946],
        [266.0551, 182.1116, 290.0341, 212.0143],
        [196.8726, 198.0351, 214.3091, 210.5594],
        [305.1704, 176.8456, 350.8604, 205.8191],
        [354.4117, 186.3788, 375.5167, 202.8780],
        [285.9145, 180.7034, 335.8421, 208.9566],
        [179.1970, 196.3996, 1

In [16]:
# We can use `Visualizer` to draw the predictions on the image.
if FIRST_TIME:
    v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    cv2_imshow(out.get_image()[:, :, ::-1], 5000)

# Train on a custom dataset

In this section, we show how to train an existing detectron2 model on a custom dataset in a new format.

We use [the balloon segmentation dataset](https://github.com/matterport/Mask_RCNN/tree/master/samples/balloon)
which only has one class: balloon.
We'll train a balloon segmentation model from an existing model pre-trained on COCO dataset, available in detectron2's model zoo.

Note that COCO dataset does not have the "balloon" category. We'll be able to recognize this new class in a few minutes.

## Prepare the dataset

In [17]:
# download, decompress the data
if FIRST_TIME:
    !wget https://github.com/matterport/Mask_RCNN/releases/download/v2.1/balloon_dataset.zip
    !unzip balloon_dataset.zip > /dev/null

Register the balloon dataset to detectron2, following the [detectron2 custom dataset tutorial](https://detectron2.readthedocs.io/tutorials/datasets.html).
Here, the dataset is in its custom format, therefore we write a function to parse it and prepare it into detectron2's standard format. User should write such a function when using a dataset in custom format. See the tutorial for more details.


To verify the dataset is in correct format, let's visualize the annotations of randomly selected samples in the training set:



In [18]:
# dataset_dicts = get_wastebin_dicts(TRAIN_PATH)
if FIRST_TIME:
    dataset_dicts = DatasetCatalog.get("dataset_train_reduced")
    metadata = MetadataCatalog.get("dataset_train_reduced")
    for d in random.sample(dataset_dicts, 10):
        img = cv2.imread(d["file_name"])
        
        visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, scale=0.5)
        out = visualizer.draw_dataset_dict(d)
        cv2_imshow(out.get_image()[:, :, ::-1], 3000)

In [19]:
# dataset_dicts2 = DatasetCatalog.get("dataset_train_reduced2")
# print(len(dataset_dicts2))

## Train!

Now, let's fine-tune a COCO-pretrained R50-FPN Mask R-CNN model on the balloon dataset. It takes ~2 minutes to train 300 iterations on a P100 GPU.


In [20]:
cfg1 = get_cfg()
cfg1.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
print(cfg1)

CUDNN_BENCHMARK: False
DATALOADER:
  ASPECT_RATIO_GROUPING: True
  FILTER_EMPTY_ANNOTATIONS: True
  NUM_WORKERS: 4
  REPEAT_THRESHOLD: 0.0
  SAMPLER_TRAIN: TrainingSampler
DATASETS:
  PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
  PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
  PROPOSAL_FILES_TEST: ()
  PROPOSAL_FILES_TRAIN: ()
  TEST: ('coco_2017_val',)
  TRAIN: ('coco_2017_train',)
GLOBAL:
  HACK: 1.0
INPUT:
  CROP:
    ENABLED: False
    SIZE: [0.9, 0.9]
    TYPE: relative_range
  FORMAT: BGR
  MASK_FORMAT: polygon
  MAX_SIZE_TEST: 1333
  MAX_SIZE_TRAIN: 1333
  MIN_SIZE_TEST: 800
  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
  MIN_SIZE_TRAIN_SAMPLING: choice
  RANDOM_FLIP: horizontal
MODEL:
  ANCHOR_GENERATOR:
    ANGLES: [[-90, 0, 90]]
    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]
    NAME: DefaultAnchorGenerator
    OFFSET: 0.0
    SIZES: [[32], [64], [128], [256], [512]]
  BACKBONE:
    FREEZE_AT: 2
    NAME: build_resnet_fpn_backbone
  DEVICE: cuda
  FPN:
    FUSE_TYPE: sum
    IN_FEATURES: ['res2', 

In [21]:
from detectron2.engine import DefaultTrainer
from EarlyStopping import EarlyStopping
from detectron2.evaluation import COCOEvaluator

# cfg = get_cfg()
# trainer = DefaultTrainer(cfg)
# trainer.register_callback(EarlyStopping(cfg, trainer.model, patience=3, eval_period=1))
# trainer.resume_or_load(resume=False)
# trainer.train()



cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
# cfg.DATASETS.TRAIN = ("dataset_train_snowy", "dataset_train_snowy2")
# cfg.DATASETS.TRAIN = ("dataset_train_snowy_clean", "dataset_train_snowy2_clean", "dataset_train_reduced",) # ("dataset_train_original", "dataset_train_generated_cleaned")
# cfg.DATASETS.TRAIN = ("snowy_day_train", )
# registered_generated_data_ad.append("dataset_train_reduced")
# cfg.DATASETS.TRAIN = ("dataset_train_reduced", )
cfg.DATASETS.TRAIN = tuple(registered_generated_data_ad)
# cfg.DATASETS.VAL = ("snowy_day_val", )
cfg.DATASETS.VAL = ("dataset_val", )
# cfg.DATASETS.VAL = ("dataset_val", ) # , "dataset_train_original", "dataset_train_generated", "dataset_train_generated_cleaned")
cfg.DATASETS.TEST = () # "dataset_test",)
cfg.DATALOADER.NUM_WORKERS = 4
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 4  # This is the real "batch size" commonly known to deep learning people
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 30000    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # The "RoIHead batch size". 128 is faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # TODO: set to 1 again!!!  # only has one class. (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.
# cfg.EVAL_PERIOD = 1000

# log every step instead of every 20 steps
cfg.SOLVER.LOG_PERIOD = 1

# also train on images without labels


# store each run in a new directory for better logging
out_dir = cfg.OUTPUT_DIR
existing_dirs = os.listdir(out_dir)
cfg.OUTPUT_DIR = os.path.join(out_dir, "run_" + str(len(existing_dirs)))
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

# create a notes.txt file to store notes about the run, such as the used training data
notes = {}
notes["train_data"] = cfg.DATASETS.TRAIN
notes["val_data"] = cfg.DATASETS.VAL

with open(os.path.join(cfg.OUTPUT_DIR, "notes.txt"), "w") as f:
    json.dump(notes, f)

val_evaluator = COCOEvaluator(cfg.DATASETS.VAL[0], cfg, True, output_dir="./output/")
trainer = DefaultTrainer(cfg) 
trainer.register_hooks([EarlyStopping(cfg, trainer.model, val_evaluator, patience=5, eval_period=100)]) # 500
trainer.resume_or_load(resume=False)
try:    
    trainer.train()
except:
    print("Training stopped early")

[32m[06/09 04:55:08 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (2, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (2,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Some model parameters or buffers are not found in the checkpoint:
[34mroi_heads.box_predictor.bbox_pred.{bias, weight}[0m
[34mroi_heads.box_predictor.cls_s

[32m[06/09 04:55:08 d2.engine.train_loop]: [0mStarting training from iteration 0
after_step at step 0
starting evaluation!!!
[32m[06/09 04:55:09 d2.evaluation.evaluator]: [0mStart inference on 89 batches
[32m[06/09 04:55:15 d2.evaluation.evaluator]: [0mInference done 11/89. Dataloading: 0.0018 s/iter. Inference: 0.4484 s/iter. Eval: 0.0010 s/iter. Total: 0.4513 s/iter. ETA=0:00:35
[32m[06/09 04:55:20 d2.evaluation.evaluator]: [0mInference done 22/89. Dataloading: 0.0025 s/iter. Inference: 0.4529 s/iter. Eval: 0.0010 s/iter. Total: 0.4565 s/iter. ETA=0:00:30
[32m[06/09 04:55:25 d2.evaluation.evaluator]: [0mInference done 34/89. Dataloading: 0.0026 s/iter. Inference: 0.4488 s/iter. Eval: 0.0010 s/iter. Total: 0.4524 s/iter. ETA=0:00:24
[32m[06/09 04:55:30 d2.evaluation.evaluator]: [0mInference done 45/89. Dataloading: 0.0026 s/iter. Inference: 0.4494 s/iter. Eval: 0.0010 s/iter. Total: 0.4530 s/iter. ETA=0:00:19
[32m[06/09 04:55:35 d2.evaluation.evaluator]: [0mInference don

In [22]:
# Look at training curves in tensorboard:
%load_ext tensorboard
%tensorboard --logdir output

## Inference & evaluation using the trained model
Now, let's run inference with the trained model on the balloon validation dataset. First, let's create a predictor using the model we just trained:



In [23]:
# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
# cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_best.pth")  # path to the model we just trained
# cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
# cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.007 # set a custom testing threshold
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 # set a custom testing threshold
predictor = DefaultPredictor(cfg)

[32m[06/09 06:03:45 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from ./output/run_182/model_best.pth ...


Then, we randomly select several samples to visualize the prediction results.

In [24]:
from detectron2.utils.visualizer import ColorMode

dataset_dicts = DatasetCatalog.get("dataset_val")
metadata = MetadataCatalog.get("dataset_val")
counter = 0
for d in dataset_dicts:
    if counter > 2:
        break
# for d in random.sample(dataset_dicts, 30):    
    if len(d["annotations"]) == 0:
        continue

    counter += 1
    
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
    v = Visualizer(im[:, :, ::-1],
                   metadata=metadata, 
                   scale=0.5#, 
                #    instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels. This option is only available for segmentation models
    )
    # draw the sample with the correct label and the predicted label
    correct_out = v.draw_dataset_dict(d)
    cv2_imshow(correct_out.get_image()[:, :, ::-1], 4000)

    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    cv2_imshow(out.get_image()[:, :, ::-1], 10000)

[32m[06/09 06:03:46 d2.data.datasets.coco]: [0mLoaded 3438 images in COCO format from ../data/Waste_Bin_Detection_Dataset/cloudy_2021_04_09_16_02_cam5_filtered (validation and test dataset images and ground truths)/validation_set_static_gt.json


In [25]:
# dataset_to_evaluate = ["dataset_val", "dataset_train_original", "dataset_train_generated", "dataset_train_generated_cleaned"] # "dataset_val", 
# for dataset in dataset_to_evaluate:
#     # print out which classes are in the dataset
#     classes = MetadataCatalog.get(dataset)#.thing_classes
#     print("Classes in {}: {}".format(dataset, classes))

We can also evaluate its performance using AP metric implemented in COCO API.
This gives an AP of ~70. Not bad!

In [26]:
# import pickle
# with open(os.path.join(cfg.OUTPUT_DIR, "inferences.pkl"), "wb") as f:
#         pickle.dump(inferences, f)

In [27]:
# # load the inferences
# with open(os.path.join(cfg.OUTPUT_DIR, "inferences.pkl"), "rb") as f:
#     inferences_re = pickle.load(f)
# print(inferences_re)

In [28]:
inferences = {}

In [29]:
import pickle
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
# dataset_to_evaluate = ["dataset_train_original", "dataset_train_generated", "dataset_train_generated_cleaned", "dataset_val", "dataset_snowy_day", "dataset_snowy_night", "dataset_argo"] # "dataset_val", 
# dataset_to_evaluate = ["dataset_val", "dataset_train_generated_cleaned"] # "dataset_val", 
dataset_to_evaluate = ["dataset_test", "dataset_snowy_day", "dataset_snowy_night", "dataset_rb", "dataset_train_reduced"] # , "dataset_val"] # ["dataset_val", "dataset_snowy_day"] # , "dataset_snowy_night3", "dataset_argo3"]
# dataset_to_evaluate = ["dataset_train_snowy_prev2", "dataset_train_snowy2_prev2", "dataset_train_snowy_clean_prev2", "dataset_train_snowy2_clean_prev2"]
# dataset_to_evaluate = ["dataset_train_reduced"]
# dataset_to_evaluate = ["sunny_test", "cloudy_val_test", "snowy_day_test", "snowy_night_test", "rb_test"]
# inferences = {}
# cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.000001
for dataset in dataset_to_evaluate:
    evaluator = COCOEvaluator(dataset, output_dir="./output/" + dataset)
    val_loader = build_detection_test_loader(cfg, dataset)
    inferences[dataset] = inference_on_dataset(predictor.model, val_loader, evaluator) # predictor.model
    print(inferences[dataset])
    # save all inferences in a file
    with open(os.path.join(cfg.OUTPUT_DIR, "inferences.pkl"), "wb") as f:
        pickle.dump(inferences, f)

all_inferences[cfg.DATASETS.TRAIN] = inferences

    # print(inference_on_dataset(predictor.model, val_loader, evaluator))
# another equivalent way to evaluate the model is to use `trainer.test`

# Trained on original dataset + generated (cleaned) dataset
# |   AP   |  AP50  |  AP75  |  APs   |  APm   |  APl   |
# |:------:|:------:|:------:|:------:|:------:|:------:|
# | 22.630 | 34.140 | 27.150 | 9.977  | 36.584 | 54.122 |  dataset_train_original (~7000 images, ~1000 labels)
# | 44.636 | 73.037 | 47.180 | 28.148 | 49.000 | 49.672 |  dataset_train_generated (~60 images, 60 labels)
# | 48.803 | 78.352 | 53.138 | 30.783 | 52.991 | 55.455 |  dataset_train_generated_cleaned 
# | 23.292 | 35.682 | 26.742 | 11.776 | 33.034 | 47.688 |  dataset_val
# Trained on original dataset
# |   AP   |  AP50  |  AP75  |  APs   |  APm   |  APl   |
# | 22.014 | 34.676 | 26.207 | 10.798 | 36.681 | 54.749 | dataset_train_original
# | 43.123 | 73.737 | 44.663 | 38.779 | 44.977 | 48.450 | dataset_train_generated
# | 47.539 | 79.497 | 51.194 | 44.103 | 49.014 | 53.176 | dataset_train_generated_cleaned
# | 21.913 | 34.243 | 27.446 | 11.634 | 32.517 | 48.396 | dataset_val

# Learnings: Quite some variation between runs
# New snowy training not improving results
# Plan: clean up snowy data 2
# How exactly try out difference between original and generated data?
# Take only one img of each original sequence used for generation?
# apply my method on the original data, analyze results


# Trained on reduced original dataset
# |   AP   |  AP50  |  AP75  |  APs   |  APm   |  APl   |
# | 45.756 | 67.222 | 51.345 | 25.949 | 55.818 | 78.282 | dataset_val_original_reduced
# | 21.538 | 31.880 | 25.038 | 9.948 | 26.263 | 50.461 | dataset_snowy_day 
# Trained on snowy train set
# | 18.280 | 29.895 | 20.292 | 9.300 | 27.073 | 38.439 |
# | 9.994  | 16.164 | 12.322 | 6.468 | 16.309 | 18.593 |
# Combined unique train and snowy train set
# | 38.833 | 58.102 | 45.095 | 19.591 | 47.430 | 75.208 |
# | 21.495 | 32.802 | 25.634 | 9.489 | 29.433 | 49.897 |
# Combined unique train and snowy train set (cleaned)
# | 37.469 | 53.721 | 42.719 | 18.028 | 45.765 | 78.251 |
# | 24.375 | 36.477 | 30.229 | 12.737 | 30.198 | 53.065 |
# Trained on automatically reduced (to max 3) snowy train set
# | 38.031 | 55.635 | 43.982 | 19.117 | 46.702 | 77.870 |
# | 22.047 | 33.283 | 25.873 | 11.772 | 30.238 | 51.071 |
# Trained on automatically reduced (to max 1) snowy train set
# | 36.117 | 51.607 | 40.976 | 18.035 | 43.181 | 73.940 |
# | 22.901 | 34.549 | 26.880 | 13.523 | 25.643 | 51.252 |
# Trained on automatically reduced (to max 1) snowy train set
# | 37.685 | 52.681 | 43.755 | 19.267 | 47.180 | 68.820 |
# | 24.363 | 35.289 | 30.541 | 14.746 | 29.810 | 47.583 |
# Trained on automatically reduced (to max 1) snowy2 train set (cleaned)
# | 36.995 | 51.467 | 43.097 | 19.577 | 44.768 | 70.279 |
# | 22.009 | 31.875 | 27.657 | 12.179 | 27.253 | 46.786 |
# Trained on automatically reduced (to max 1) origin + snowy 1+2 train set
# | 37.177 | 53.430 | 42.364 | 20.412 | 43.247 | 75.661 |
# | 22.720 | 34.215 | 27.544 | 13.802 | 26.913 | 46.457 |
# | 35.735 | 51.473 | 41.615 | 15.925 | 44.434 | 75.166 | dataset_test_original_reduced


# Trained on reduced original dataset
# |   AP   |  AP50  |  AP75  |  APs   |  APm   |  APl   |
# | 42.227 | 58.580 | 51.519 | 20.045 | 49.418 | 77.425 | dataset_test_original_reduced
# | 23.288 | 35.369 | 27.442 | 12.593 | 30.684 | 50.503 | dataset_snowy_day
# | 30.954 | 42.883 | 39.340 | 6.343  | 33.531 | 49.271 | dataset_snowy_night
# | 26.906 | 37.472 | 34.993 | 10.497 | 28.353 | 67.791 | dataset_rb
# Trained on reduced original ds + snowy train set
# | 40.526 | 55.548 | 47.742 | 18.466 | 49.051 | 72.051 | dataset_test_original_reduced
# | 22.646 | 32.486 | 29.322 | 14.175 | 28.426 | 45.474 | dataset_snowy_day
# | 32.458 | 44.207 | 39.656 | 9.361  | 37.541 | 42.727 | dataset_snowy_night
# | 30.169 | 43.084 | 40.356 | 11.836 | 30.764 | 70.756 | dataset_rb
# Trained on snowy train set
# | 27.400 | 43.050 | 34.106 | 12.741 | 32.122 | 55.029 | dataset_test_original_reduced
# | 13.037 | 20.674 | 16.070 | 8.540  | 17.123 | 28.456 | dataset_snowy_day
# | 16.335 | 25.245 | 22.863 | 9.003  | 19.103 | 22.028 | dataset_snowy_night
# | 19.437 | 32.808 | 26.297 | 9.343  | 24.582 | 49.679 | dataset_rb
# Trained on reduced original ds+snowy11!!!_clean train set (110)
## | 40.629 | 55.369 | 48.316 | 20.481 | 47.228 | 71.452 | dataset_test_original_reduced
## | 23.651 | 34.864 | 29.611 | 14.119 | 28.572 | 47.609 | dataset_snowy_day
## | 32.591 | 44.266 | 39.507 | 10.260 | 37.368 | 40.815 | dataset_snowy_night
## | 28.068 | 38.899 | 36.524 | 11.019 | 26.866 | 71.075 | dataset_rb
# Trained on reduced original ds+snowy12_clean train set
# | 40.249 | 55.174 | 46.785 | 18.387 | 45.818 | 75.338 | dataset_test_original_reduced
# | 22.504 | 33.711 | 27.502 | 12.444 | 26.322 | 50.499 | dataset_snowy_day
# | 30.741 | 42.843 | 37.571 | 8.163  | 36.492 | 40.848 | dataset_snowy_night
# | 28.301 | 39.012 | 35.747 | 10.758 | 26.531 | 72.088 | dataset_rb

# Trained on reduced original ds+snowy2_clean train set, validated on base+snowy1
# | 40.870 | 55.616 | 46.718 | 18.025 | 48.349 | 75.099 |
# | 24.301 | 36.334 | 32.269 | 14.011 | 29.479 | 50.280 |
# | 33.000 | 44.968 | 38.284 | 9.169  | 37.113 | 44.403 |
# | 31.123 | 41.978 | 38.914 | 11.120 | 32.393 | 74.179 |

# Trained on reduced original ds+snowy2_clean train set, validated on base val (again for test purposes) (run126)
# | 41.216 | 56.311 | 48.854 | 19.793 | 48.123 | 74.205 |
# | 23.317 | 34.646 | 27.433 | 13.878 | 27.546 | 50.096 |
# | 32.696 | 44.561 | 38.976 | 5.875  | 39.534 | 42.136 |
# | 31.938 | 44.228 | 41.942 | 15.081 | 31.874 | 71.841 |
# Trained on reduced original ds+snowy2_clean train set, validated on base val (again for test purposes) (run129), corrected EarlyStop for +1
# | 41.586 | 57.958 | 49.488 | 19.917 | 49.430 | 75.520 |
# | 20.348 | 31.701 | 22.793 | 11.257 | 27.991 | 49.673 |
# | 29.590 | 42.131 | 37.192 | 4.390  | 33.983 | 44.437 |
# | 28.617 | 41.018 | 39.489 | 14.029 | 29.738 | 68.481 |

# Trained on reduced original ds+snowy2 train set 5 shared_feats, validated on base val
# | 40.940 | 55.415 | 47.934 | 19.880 | 47.156 | 74.997 |
# | 24.242 | 35.449 | 29.178 | 13.602 | 28.188 | 51.382 |
# | 31.979 | 44.839 | 39.228 | 8.137 | 35.326 | 47.263 |
# | 27.096 | 37.766 | 34.528 | 10.253 | 25.181 | 72.292 |
# | 41.531 | 57.572 | 47.809 | 22.123 | 46.618 | 72.393 | validation set
# | 50.841 | 75.138 | 59.625 | 33.334 | 57.911 | 76.433 | validation set at end of training
# Trained on snowy2 train set 5shared_feats, validated on base val
# | 9.003 | 14.244 | 10.563 | 4.673 | 14.189 | 10.277 |
# | 4.896 | 7.581  | 6.363  | 5.266 | 6.202 | 3.402 |
# | 5.324 | 7.788  | 7.329  | 9.433 | 6.142 | 3.748 |
# | 6.676 | 9.884  | 8.396  | 3.642 | 9.981 | 21.080 |
# | 9.684 | 14.752 | 11.823 | 8.190 | 12.894 | 8.984 |

# Trained on reduced original ds+snowy2 train set 3 shared_feats, validated on base val
# | 41.350 | 56.059 | 48.035 | 19.497 | 48.979 | 73.283 |
# | 23.036 | 34.242 | 29.728 | 13.483 | 27.873 | 48.104 |
# | 30.043 | 40.968 | 35.327 | 7.369 | 33.548 | 43.896 |
# | 29.247 | 38.818 | 36.348 | 10.579 | 29.475 | 73.652 |

# Trained on reduced original ds+snowy2 clean train set 5 shared_feats, validated on base val
# | 39.231 | 53.806 | 45.978 | 18.716 | 45.395 | 76.120 |
# | 22.942 | 33.397 | 26.931 | 12.528 | 28.458 | 50.749 |
# | 28.345 | 39.803 | 34.844 | 4.790 | 31.903 | 45.840 |
# | 26.162 | 36.147 | 34.613 | 13.855 | 25.274 | 72.274 |

# Trained on Cloudy TS, validated on base val
# | 73.398 | 90.075 | 86.368 | 59.004 | 79.380 | 92.290 |
# | 27.903 | 40.912 | 33.097 | 15.120 | 36.454 | 56.199 |
# | 36.365 | 49.972 | 45.951 | 12.264 | 36.646 | 59.991 |
# | 36.947 | 51.098 | 48.613 | 16.844 | 39.489 | 71.109 |

# Trained on Snowy day test set, validated on base val
# | 36.795 | 50.766 | 44.389 | 18.234 | 44.289 | 66.710 |
# | 46.484 | 60.183 | 56.992 | 34.869 | 53.521 | 76.481 |
# | 38.418 | 54.010 | 43.548 | 14.544 | 37.980 | 59.682 |
# | 26.050 | 36.007 | 33.870 | 15.842 | 27.827 | 50.114 |

# Trained on Snowy night test set, validated on base val
# | 26.042 | 36.250 | 31.881 | 8.690 | 30.962 | 58.396 |
# | 17.130 | 25.504 | 22.580 | 10.059 | 24.466 | 35.319 |
# | 64.369 | 79.325 | 77.266 | 53.482 | 58.087 | 88.358 |
# | 7.783 | 11.361 | 9.423  | 3.953 | 11.219 | 22.678 |

# Trained on RB Test set, validated on base val
# | 30.353 | 43.750 | 37.713 | 12.837 | 41.747 | 58.280 |
# | 15.401 | 22.403 | 19.459 | 10.184 | 20.932 | 32.432 |
# | 25.105 | 35.440 | 33.512 | 13.819 | 30.068 | 30.163 |
# | 51.459 | 65.659 | 61.971 | 35.790 | 58.098 | 86.360 |

# Trained on Cloudy TS, validated on base val, only 2000 iterations
# | 60.644 | 79.082 | 72.994 | 42.927 | 69.393 | 86.044 |
# | 27.339 | 40.496 | 31.221 | 16.247 | 35.326 | 52.667 |
# | 35.940 | 49.729 | 45.018 | 16.105 | 38.856 | 58.505 |
# | 30.525 | 44.472 | 39.534 | 16.170 | 33.523 | 73.552 |
# | 32.991 | 45.668 | 38.869 | 12.542 | 40.493 | 70.720 | # sunny train set

# Trained on Sunny DS, validated on base val, run_151
# | 38.445 | 51.501 | 45.589 | 17.469 | 45.061 | 71.953 |
# | 22.620 | 33.039 | 28.874 | 12.032 | 27.091 | 46.991 |
# | 29.439 | 40.622 | 35.526 | 6.896 | 34.094 | 39.532 |
# | 23.700 | 32.438 | 29.837 | 10.063 | 19.758 | 68.526 |
# | 47.135 | 60.447 | 55.340 | 21.158 | 52.799 | 77.901 |
# |   AP   |  AP50  |  AP75  |  APs  |  APm   |  APl  |
# |:------:|:------:|:------:|:-----:|:------:|:-----:|
# | 14.830 | 29.550 | 8.663  | 0.644 | 37.907 | 0.000 | Snowy1
# | 21.744 | 43.705 | 23.492 | 6.561 | 35.805 | 80.000 | Snowy2
# | 55.050 | 100.000 | 25.248 | 70.000 | 52.244 |  nan  |
# | 44.797 | 83.168 | 37.030 | 32.673 | 72.525 |  nan  | Snowy2_clean
# | 22.075 | 49.934 | 11.069 | 15.651 | 28.186 | 52.525 | Snowy1_prev
# | 38.256 | 70.252 | 39.190 | 16.171 | 32.623 | 66.186 | Snowy2_prev
# | 36.313 | 66.038 | 25.129 | 18.614 | 47.057 | 70.000 | Snowy1_clean_prev
# | 32.216 | 65.966 | 25.493 | 17.592 | 50.402 | 55.149 | Snowy2_clean_prev

# Trained on Sunny DS for 3700 steps, validated on base val, run_161
# | 40.764 | 56.288 | 48.817 | 18.795 | 48.413 | 77.876 |
# | 20.906 | 31.741 | 25.021 | 9.712 | 29.664 | 50.245 |
# | 29.396 | 41.675 | 35.754 | 6.444 | 31.602 | 46.891 |
# | 30.215 | 40.978 | 39.734 | 14.280 | 30.306 | 69.979 |
# | 67.056 | 78.741 | 74.886 | 54.285 | 64.909 | 95.520 | # sunny train set

# Trained on sunny + gen_ad, validated on cloudy_val, run_182
# | 42.548 | 57.815 | 51.854 | 21.842 | 50.296 | 75.707 |
# | 23.600 | 35.603 | 28.463 | 13.760 | 29.930 | 49.543 |
# | 27.776 | 38.116 | 34.173 | 5.264 | 30.981 | 42.980 |
# | 30.402 | 42.728 | 40.584 | 12.693 | 33.506 | 68.037 |
# | 59.024 | 73.689 | 68.021 | 38.365 | 62.845 | 91.762 | # sunny train set



[32m[06/09 06:04:35 d2.data.datasets.coco]: [0mLoaded 3437 images in COCO format from ../data/Waste_Bin_Detection_Dataset/cloudy_2021_04_09_16_02_cam5_filtered (validation and test dataset images and ground truths)/test_set_static_gt.json
[32m[06/09 06:04:35 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[06/09 06:04:35 d2.data.common]: [0mSerializing the dataset using: <class 'detectron2.data.common.NumpySerializedList'>
[32m[06/09 06:04:35 d2.data.common]: [0mSerializing 3437 elements to byte tensors and concatenating them all ...
[32m[06/09 06:04:35 d2.data.common]: [0mSerialized dataset takes 1.01 MiB
[32m[06/09 06:04:35 d2.evaluation.evaluator]: [0mStart inference on 3437 batches
[32m[06/09 06:04:36 d2.evaluation.evaluator]: [0mInference done 11/3437. Dataloading: 0.0007 s/iter. Inference: 0.0987 s/iter. Eval: 0.0001 s/iter. Total: 0.0995 s/iter.

In [30]:
# Sunny 60/20/20 split
# | 17.051 | 26.048 | 17.827 | 7.947 | 18.290 | 65.991 |
# | 26.614 | 38.855 | 29.993 | 11.276 | 31.853 | 64.150 |
# | 37.532 | 54.219 | 46.709 | 36.368 | 37.212 | 48.022 |
# | 0.000 | 0.000  | 0.000  |  nan  | 0.000 |  nan  |
# | 52.614 | 65.545 | 65.545 |  nan  | 35.000 | 90.000 |



# Snowy Day (50/25/25)
# | 0.000 | 0.000  | 0.000  | 0.000 | 0.000 | 0.000 |


In [31]:
dataset_to_evaluate = ["dataset_train_original", "dataset_train_generated", "dataset_train_generated_cleaned", "dataset_val"] # "dataset_val", 
max_dataset_len = max([len(dataset) for dataset in dataset_to_evaluate])
print(f"Trained on {cfg.DATASETS.TRAIN} for {cfg.SOLVER.MAX_ITER } iterations")
print(" " * (max_dataset_len + 1) + "AP\tAP50\tAP75\tAPs\tAPm\tAPl")

src = (cfg.DATASETS.TRAIN[0], )
new_dataset = cfg.DATASETS.TRAIN
# print("All_inferences", all_inferences)
inferences = all_inferences[new_dataset]
inferences_diff = []
source_inferences = all_inferences[src]
for inf, source_inf in zip(inferences, source_inferences):
    # print(inf.keys())
    # print("src", source_inf.keys())
    # get the difference between the source and the current dataset inferences lists
    # inferences_diff.append([inferences[i] - source_inferences[i] for i in range(len(inferences))])
    inferences_diff.append({'bbox': 
        {k: inf['bbox'][k] - source_inf['bbox'][k] for k in inf['bbox']}})

def print_result(datasets, inferences):
    for dataset, inference in zip(datasets, inferences):
        results = ""
        # print("inf", inference)
        for value in inference["bbox"].values():
            results += "\t" + str(round(value, 3))
        # fill the dataset name with spaces to make the output look nice
        dataset = dataset + " " * (max_dataset_len - len(dataset))
        print(f"{dataset}{results}")

# print("iinf", inferences)
print_result(dataset_to_evaluate, inferences)
print("")
print(f"Diff between {src} and {new_dataset}")
print_result(dataset_to_evaluate, inferences_diff)

# for dataset, inference in zip(dataset_to_evaluate, inferences):
#     results = ""
#     for value in inference["bbox"].values():
#         results += "\t" + str(round(value, 3))
#     # fill the dataset name with spaces to make the output look nice
#     dataset = dataset + " " * (max_dataset_len - len(dataset))
#     print(f"{dataset}{results}")

# Trained on ('dataset_train_original',) for 3000 iterations
#                                   AP	    AP50	AP75	APs	    APm	    APl
# dataset_train_original         	47.712	71.238	54.205	33.646	59.207	75.995
# dataset_train_generated        	50.498	83.097	56.722	38.37	53.308	59.683
# dataset_train_generated_cleaned	55.923	89.011	64.326	41.508	58.227	70.957
# dataset_val                    	41.819	59.327	50.96	23.14	50.126	75.897

# Trained on ('dataset_train_reduced',) for 3000 iterations
#                                   AP	    AP50	AP75	APs	    APm	    APl
# dataset_train_original         	43.166	62.304	49.716	27.37	55.352	78.294
# dataset_train_generated        	51.598	84.813	55.95	41.168	52.868	61.18
# dataset_train_generated_cleaned	57.243	90.986	64.276	46.277	57.732	72.756
# dataset_val                    	36.084	52.41	41.594	16.307	44.483	77.121

Trained on ('2021_02_18_06_28', '2021_03_25_14_04', '2021_04_05_14_35', '2021_05_18_14_02', '2021_06_05_12_08', '2021_07_07_06_41', '2021_08_09_06_30', '2021_09_15_06_28', '2021_10_15_18_16', '2021_11_02_12_59', '2021_12_15_12_54', '2022_01_21_14_04', 'dataset_train_reduced') for 30000 iterations
                                AP	AP50	AP75	APs	APm	APl


KeyError: ('2021_02_18_06_28',)

In [None]:
# Base dataset:
# Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.222
#  Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.346
#  Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.256
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.097
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.328
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.445
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.271
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.364
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.365
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.193
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.491
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.572
# [02/07 21:20:55 d2.evaluation.coco_evaluation]: Evaluation results for bbox: 
# |   AP   |  AP50  |  AP75  |  APs  |  APm   |  APl   |
# |:------:|:------:|:------:|:-----:|:------:|:------:|
# | 22.216 | 34.607 | 25.624 | 9.704 | 32.764 | 44.500 |

# OrderedDict([('bbox', {'AP': 22.216189299980034, 'AP50': 34.60726221662003, 'AP75': 25.623723833208423, 'APs': 9.704298064507062, 'APm': 32.76426802413977, 'APl': 44.499573814016266})])

# Only Generated:
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.207
#  Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.308
#  Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.249
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.064
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.325
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.482
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.257
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.334
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.334
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.141
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.477
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.557
# [02/07 22:06:46 d2.evaluation.coco_evaluation]: Evaluation results for bbox: 
# |   AP   |  AP50  |  AP75  |  APs  |  APm   |  APl   |
# |:------:|:------:|:------:|:-----:|:------:|:------:|
# | 20.737 | 30.766 | 24.910 | 6.382 | 32.485 | 48.188 |

# Combined:
# Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.224
#  Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.362
#  Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.241
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.106
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.346
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.459
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.280
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.394
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.395
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.223
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.535
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.556
# [02/07 22:35:46 d2.evaluation.coco_evaluation]: Evaluation results for bbox: 
# |   AP   |  AP50  |  AP75  |  APs   |  APm   |  APl   |
# |:------:|:------:|:------:|:------:|:------:|:------:|
# | 22.430 | 36.236 | 24.099 | 10.622 | 34.568 | 45.883 |
# cleaned:
# | 23.292 | 35.682 | 26.742 | 11.776 | 33.034 | 47.688 |

# Cleaned combined:
# Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.233
#  Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.357
#  Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.267
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.118
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.330
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.477
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.283
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.385
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.386
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.237
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.492
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.579
# [02/14 22:21:44 d2.evaluation.coco_evaluation]: Evaluation results for bbox: 
# |   AP   |  AP50  |  AP75  |  APs   |  APm   |  APl   |
# |:------:|:------:|:------:|:------:|:------:|:------:|
# | 23.292 | 35.682 | 26.742 | 11.776 | 33.034 | 47.688 |

# Only Generated cleaned:
# Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.207
#  Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.323
#  Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.244
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.068
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.323
#  Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.508
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.263
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.363
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.363
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.179
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.502
#  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.569
# [02/14 22:10:04 d2.evaluation.coco_evaluation]: Evaluation results for bbox: 
# |   AP   |  AP50  |  AP75  |  APs  |  APm   |  APl   |
# |:------:|:------:|:------:|:-----:|:------:|:------:|
# | 20.746 | 32.317 | 24.438 | 6.771 | 32.274 | 50.778 |

# Base         | 22.216 | 34.607 | 25.624 | 9.704  | 32.764 | 44.500 |
# Combined     | 22.430 | 36.236 | 24.099 | 10.622 | 34.568 | 45.883 |
# Combcleaned  | 23.292 | 35.682 | 26.742 | 11.776 | 33.034 | 47.688 |
# GeneratedOnly| 20.737 | 30.766 | 24.910 | 6.382  | 32.485 | 48.188 |
# CleanedOnly  | 20.746 | 32.317 | 24.438 | 6.771  | 32.274 | 50.778 |

# Base on pure validation set:
#               | 23.440 | 34.936 | 27.234 | 12.098 | 32.949 | 50.946 |


In [None]:
import numpy as np
import matplotlib.pyplot as plt 
from pycocotools import cocoeval
# %load_ext autoreload
# %autoreload 2
from pycocotools.coco import COCO
# import importlib
# importlib.reload(COCO)

def plot_pr_curve(dataset_gt, dataset_dt):
    gt_coco = COCO(dataset_gt)

    # dt_coco = COCO("./output/" + dataset + "/coco_instances_results.json")

    # with open("./output/" + dataset + "/coco_instances_results.json", 'r') as f:
    #     dataset = json.load(f)
    # print(dataset)

    dt_coco = gt_coco.loadRes("./output/" + dataset_dt + "/coco_instances_results.json")
    coco_eval = cocoeval.COCOeval(gt_coco, dt_coco, 'bbox')
    coco_eval.evaluate()
    coco_eval.accumulate()
    # print(coco_eval.eval)
    all_precision = coco_eval.eval['precision']

    pr_5 = all_precision[0, :, 0, 0, 2] # data for IoU@0.5
    pr_7 = all_precision[4, :, 0, 0, 2] # data for IoU@0.7
    pr_9 = all_precision[8, :, 0, 0, 2] # data for IoU@0.9

    x = np.arange(0, 1.01, 0.01)
    plt.title('Precision-Recall Curve: ' + dataset_dt)
    # add a subtitle
    plt.suptitle(f'Trained on {cfg.DATASETS.TRAIN}', fontsize=10)
    plt.plot(x, pr_5, label='IoU@0.5')
    plt.plot(x, pr_7, label='IoU@0.7')
    plt.plot(x, pr_9, label='IoU@0.9')
    # show the legend
    plt.legend()
    # label the axes
    plt.xlabel('Recall')
    plt.ylabel('Precision')

    # save the plot 
    filename = dataset_dt + '_trained_on_' + str(cfg.DATASETS.TRAIN) + '.png'
    save_dir = "output/pr-curves/"
    # create the dir
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    file_path = save_dir + filename
    plt.savefig(file_path, dpi=100)
    print("Saved at " + file_path)

    plt.show()

In [None]:
dataset_to_evaluate = [(VAL_PATH + VAL_NAME, "dataset_val"),
                    (TRAIN_PATH_ORIGINAL + TRAIN_NAME_ORIGINAL, "dataset_train_original"), 
                    (TRAIN_PATH_GENERATED + TRAIN_NAME_CLEANED, "dataset_train_generated_cleaned")]
for dataset in dataset_to_evaluate:
    plot_pr_curve(dataset[0], dataset[1])

# Other types of builtin models

We showcase simple demos of other types of models below:

In [None]:
# Inference with a keypoint detection model
# cfg = get_cfg()   # get a fresh new config
# cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"))
# cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7  # set threshold for this model
# cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml")
# predictor = DefaultPredictor(cfg)
# outputs = predictor(im)
# v = Visualizer(im[:,:,::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
# out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
# cv2_imshow(out.get_image()[:, :, ::-1])

In [None]:
# Inference with a panoptic segmentation model
# cfg = get_cfg()
# cfg.merge_from_file(model_zoo.get_config_file("COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml"))
# cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml")
# predictor = DefaultPredictor(cfg)
# panoptic_seg, segments_info = predictor(im)["panoptic_seg"]
# v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
# out = v.draw_panoptic_seg_predictions(panoptic_seg.to("cpu"), segments_info)
# cv2_imshow(out.get_image()[:, :, ::-1])

# Run panoptic segmentation on a video

In [None]:
# # This is the video we're going to process
# from IPython.display import YouTubeVideo, display
# video = YouTubeVideo("ll8TgCZ0plk", width=500)
# display(video)

In [None]:
# Install dependencies, download the video, and crop 5 seconds for processing
# !pip install youtube-dl
# !youtube-dl https://www.youtube.com/watch?v=ll8TgCZ0plk -f 22 -o video.mp4
# !ffmpeg -i video.mp4 -t 00:00:06 -c:v copy video-clip.mp4

In [None]:
# Run frame-by-frame inference demo on this video (takes 3-4 minutes) with the "demo.py" tool we provided in the repo.
# !git clone https://github.com/facebookresearch/detectron2
# # Note: this is currently BROKEN due to missing codec. See https://github.com/facebookresearch/detectron2/issues/2901 for workaround.
# %run detectron2/demo/demo.py --config-file detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml --video-input video-clip.mp4 --confidence-threshold 0.6 --output video-output.mkv \
#   --opts MODEL.WEIGHTS detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_101_3x/139514519/model_final_cafdb1.pkl

In [None]:
# Download the results
# from google.colab import files
# files.download('video-output.mkv')