In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
%python -m pip install pyyaml
import sys, os, distutils.core
# Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities (e.g. compiled operators).
# See https://detectron2.readthedocs.io/tutorials/install.html for full installation instructions
%git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
%python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))

# Properly install detectron2. (Please do not install twice in both ways)
# !python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

In [3]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0
torch:  2.4 ; cuda:  cu121
detectron2: 0.6


In [4]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

In [5]:
data_dir_path = "/content/drive/MyDrive/instseg/data/"

In [6]:
from detectron2.data.datasets import register_coco_instances
register_coco_instances("taco_train", {}, data_dir_path + "mapped_annotations_0_train.json", data_dir_path + "images/")
register_coco_instances("taco_val", {}, data_dir_path + "mapped_annotations_0_val.json", data_dir_path + "images/")
register_coco_instances("taco_test", {}, data_dir_path + "mapped_annotations_0_test.json", data_dir_path + "images/")

In [7]:
import json

with open('/content/drive/MyDrive/instseg/data/mapped_annotations_0_train.json') as f:
    data = json.load(f)

unique_id = 0
for annotation in data['annotations']:
    annotation['id'] = unique_id
    unique_id += 1

with open('/content/drive/MyDrive/instseg/data/mapped_annotations_0_train.json', 'w') as f:
    json.dump(data, f, indent=4)


In [8]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [9]:
from detectron2.evaluation import COCOEvaluator
from detectron2.engine import DefaultTrainer

In [10]:
from detectron2.data import build_detection_train_loader
class Trainer(DefaultTrainer):
  @classmethod
  def build_evaluator(cls, cfg, dataset_name, output_folder=None):
    if output_folder is None:
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
    return COCOEvaluator(dataset_name, output_dir=output_folder)

In [11]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("taco_train",)
cfg.DATASETS.TEST = ("taco_val",)
cfg.TEST.EVAL_PERIOD = 9000
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.001
cfg.SOLVER.MAX_ITER = 36000
cfg.SOLVER.STEPS = []
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 10
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

[09/08 22:11:00 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res

model_final_f10217.pkl: 178MB [00:00, 257MB/s]                           
roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
roi_heads.mask_head.predictor.{bias, weight}


[09/08 22:11:01 d2.engine.train_loop]: Starting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[09/08 22:11:38 d2.utils.events]:  eta: 15:18:20  iter: 19  total_loss: 4.028  loss_cls: 2.453  loss_box_reg: 0.643  loss_mask: 0.6925  loss_rpn_cls: 0.2057  loss_rpn_loc: 0.05521    time: 1.6442  last_time: 3.2791  data_time: 1.3947  last_data_time: 3.0395   lr: 1.9981e-05  max_mem: 2374M
[09/08 22:12:08 d2.utils.events]:  eta: 14:24:10  iter: 39  total_loss: 3.482  loss_cls: 1.963  loss_box_reg: 0.6313  loss_mask: 0.6824  loss_rpn_cls: 0.06437  loss_rpn_loc: 0.01043    time: 1.4784  last_time: 1.4208  data_time: 1.1490  last_data_time: 1.2539   lr: 3.9961e-05  max_mem: 2374M
[09/08 22:12:35 d2.utils.events]:  eta: 14:23:41  iter: 59  total_loss: 2.287  loss_cls: 1.032  loss_box_reg: 0.4925  loss_mask: 0.674  loss_rpn_cls: 0.06019  loss_rpn_loc: 0.02547    time: 1.4385  last_time: 1.4965  data_time: 1.1942  last_data_time: 1.3487   lr: 5.9941e-05  max_mem: 2374M
[09/08 22:13:02 d2.utils.events]:  eta: 14:10:23  iter: 79  total_loss: 1.941  loss_cls: 0.6617  loss_box_reg: 0.5668  loss_

  self.pid = os.fork()


[09/08 23:19:00 d2.evaluation.evaluator]: Inference done 11/150. Dataloading: 0.2738 s/iter. Inference: 0.0736 s/iter. Eval: 0.2641 s/iter. Total: 0.6115 s/iter. ETA=0:01:24
[09/08 23:19:06 d2.evaluation.evaluator]: Inference done 19/150. Dataloading: 0.4111 s/iter. Inference: 0.0644 s/iter. Eval: 0.2035 s/iter. Total: 0.6791 s/iter. ETA=0:01:28
[09/08 23:19:12 d2.evaluation.evaluator]: Inference done 24/150. Dataloading: 0.4390 s/iter. Inference: 0.0676 s/iter. Eval: 0.3062 s/iter. Total: 0.8130 s/iter. ETA=0:01:42
[09/08 23:19:18 d2.evaluation.evaluator]: Inference done 33/150. Dataloading: 0.3420 s/iter. Inference: 0.0687 s/iter. Eval: 0.3390 s/iter. Total: 0.7499 s/iter. ETA=0:01:27
[09/08 23:19:23 d2.evaluation.evaluator]: Inference done 39/150. Dataloading: 0.3262 s/iter. Inference: 0.0682 s/iter. Eval: 0.3778 s/iter. Total: 0.7724 s/iter. ETA=0:01:25
[09/08 23:19:28 d2.evaluation.evaluator]: Inference done 47/150. Dataloading: 0.2952 s/iter. Inference: 0.0681 s/iter. Eval: 0.382

  self.pid = os.fork()


[09/09 00:18:47 d2.evaluation.evaluator]: Inference done 11/150. Dataloading: 0.0070 s/iter. Inference: 0.0582 s/iter. Eval: 0.1571 s/iter. Total: 0.2223 s/iter. ETA=0:00:30
[09/09 00:18:52 d2.evaluation.evaluator]: Inference done 31/150. Dataloading: 0.0428 s/iter. Inference: 0.0589 s/iter. Eval: 0.1497 s/iter. Total: 0.2516 s/iter. ETA=0:00:29
[09/09 00:18:58 d2.evaluation.evaluator]: Inference done 44/150. Dataloading: 0.0411 s/iter. Inference: 0.0597 s/iter. Eval: 0.1992 s/iter. Total: 0.3003 s/iter. ETA=0:00:31
[09/09 00:19:03 d2.evaluation.evaluator]: Inference done 60/150. Dataloading: 0.0353 s/iter. Inference: 0.0599 s/iter. Eval: 0.2102 s/iter. Total: 0.3055 s/iter. ETA=0:00:27
[09/09 00:19:08 d2.evaluation.evaluator]: Inference done 75/150. Dataloading: 0.0295 s/iter. Inference: 0.0600 s/iter. Eval: 0.2226 s/iter. Total: 0.3123 s/iter. ETA=0:00:23
[09/09 00:19:13 d2.evaluation.evaluator]: Inference done 98/150. Dataloading: 0.0257 s/iter. Inference: 0.0595 s/iter. Eval: 0.203

  self.pid = os.fork()


[09/09 01:17:33 d2.evaluation.evaluator]: Inference done 11/150. Dataloading: 0.0020 s/iter. Inference: 0.0615 s/iter. Eval: 0.2042 s/iter. Total: 0.2678 s/iter. ETA=0:00:37
[09/09 01:17:39 d2.evaluation.evaluator]: Inference done 27/150. Dataloading: 0.0334 s/iter. Inference: 0.0594 s/iter. Eval: 0.2232 s/iter. Total: 0.3163 s/iter. ETA=0:00:38
[09/09 01:17:44 d2.evaluation.evaluator]: Inference done 42/150. Dataloading: 0.0321 s/iter. Inference: 0.0606 s/iter. Eval: 0.2352 s/iter. Total: 0.3280 s/iter. ETA=0:00:35
[09/09 01:17:49 d2.evaluation.evaluator]: Inference done 60/150. Dataloading: 0.0322 s/iter. Inference: 0.0602 s/iter. Eval: 0.2268 s/iter. Total: 0.3195 s/iter. ETA=0:00:28
[09/09 01:17:55 d2.evaluation.evaluator]: Inference done 74/150. Dataloading: 0.0292 s/iter. Inference: 0.0606 s/iter. Eval: 0.2430 s/iter. Total: 0.3329 s/iter. ETA=0:00:25
[09/09 01:18:00 d2.evaluation.evaluator]: Inference done 97/150. Dataloading: 0.0269 s/iter. Inference: 0.0602 s/iter. Eval: 0.218

  self.pid = os.fork()


[09/09 02:16:36 d2.evaluation.evaluator]: Inference done 11/150. Dataloading: 0.0015 s/iter. Inference: 0.0580 s/iter. Eval: 0.1527 s/iter. Total: 0.2122 s/iter. ETA=0:00:29
[09/09 02:16:41 d2.evaluation.evaluator]: Inference done 30/150. Dataloading: 0.0360 s/iter. Inference: 0.0580 s/iter. Eval: 0.1648 s/iter. Total: 0.2590 s/iter. ETA=0:00:31
[09/09 02:16:47 d2.evaluation.evaluator]: Inference done 48/150. Dataloading: 0.0428 s/iter. Inference: 0.0586 s/iter. Eval: 0.1809 s/iter. Total: 0.2824 s/iter. ETA=0:00:28
[09/09 02:16:52 d2.evaluation.evaluator]: Inference done 63/150. Dataloading: 0.0400 s/iter. Inference: 0.0591 s/iter. Eval: 0.2039 s/iter. Total: 0.3032 s/iter. ETA=0:00:26
[09/09 02:16:58 d2.evaluation.evaluator]: Inference done 85/150. Dataloading: 0.0326 s/iter. Inference: 0.0589 s/iter. Eval: 0.1979 s/iter. Total: 0.2895 s/iter. ETA=0:00:18
[09/09 02:17:03 d2.evaluation.evaluator]: Inference done 111/150. Dataloading: 0.0303 s/iter. Inference: 0.0585 s/iter. Eval: 0.18

## Evaluation on TACO10 Test Split

In [12]:
from detectron2.evaluation import inference_on_dataset
from detectron2.data import build_detection_test_loader

cfg.DATASETS.TEST = ("taco_test",)
evaluator = COCOEvaluator("taco_test", output_dir="output")
test_loader = build_detection_test_loader(cfg, "taco_test")
metrics = inference_on_dataset(trainer.model, test_loader, evaluator)
print(metrics)

[09/09 02:17:14 d2.evaluation.coco_evaluation]: Fast COCO eval is not built. Falling back to official COCO eval.
[09/09 02:17:15 d2.data.datasets.coco]: Loaded 150 images in COCO format from /content/drive/MyDrive/instseg/data/mapped_annotations_0_test.json
[09/09 02:17:15 d2.data.build]: Distribution of instances among all 10 categories:
|   category    | #instances   |  category  | #instances   |  category  | #instances   |
|:-------------:|:-------------|:----------:|:-------------|:----------:|:-------------|
|      Can      | 37           |   Other    | 151          |   Bottle   | 54           |
|  Bottle cap   | 41           |    Cup     | 17           |    Lid     | 15           |
| Plastic bag.. | 84           |  Pop tab   | 11           |   Straw    | 38           |
|   Cigarette   | 121          |            |              |            |              |
|     total     | 569          |            |              |            |              |
[09/09 02:17:15 d2.data.dataset_mapp

  self.pid = os.fork()


[09/09 02:17:23 d2.evaluation.evaluator]: Inference done 11/150. Dataloading: 0.3705 s/iter. Inference: 0.0570 s/iter. Eval: 0.1636 s/iter. Total: 0.5912 s/iter. ETA=0:01:22
[09/09 02:17:30 d2.evaluation.evaluator]: Inference done 21/150. Dataloading: 0.4694 s/iter. Inference: 0.0560 s/iter. Eval: 0.1725 s/iter. Total: 0.6981 s/iter. ETA=0:01:30
[09/09 02:17:36 d2.evaluation.evaluator]: Inference done 29/150. Dataloading: 0.4213 s/iter. Inference: 0.0569 s/iter. Eval: 0.2380 s/iter. Total: 0.7164 s/iter. ETA=0:01:26
[09/09 02:17:44 d2.evaluation.evaluator]: Inference done 37/150. Dataloading: 0.4277 s/iter. Inference: 0.0588 s/iter. Eval: 0.2898 s/iter. Total: 0.7766 s/iter. ETA=0:01:27
[09/09 02:17:49 d2.evaluation.evaluator]: Inference done 46/150. Dataloading: 0.3627 s/iter. Inference: 0.0597 s/iter. Eval: 0.3080 s/iter. Total: 0.7307 s/iter. ETA=0:01:15
[09/09 02:17:54 d2.evaluation.evaluator]: Inference done 52/150. Dataloading: 0.3951 s/iter. Inference: 0.0596 s/iter. Eval: 0.294

  self.pid = os.fork()


[09/09 02:19:06 d2.evaluation.evaluator]: Total inference time: 0:01:46.916652 (0.737356 s / iter per device, on 1 devices)
[09/09 02:19:06 d2.evaluation.evaluator]: Total inference pure compute time: 0:00:08 (0.058153 s / iter per device, on 1 devices)
[09/09 02:19:06 d2.evaluation.coco_evaluation]: Preparing results for COCO format ...
[09/09 02:19:06 d2.evaluation.coco_evaluation]: Saving results to output/coco_instances_results.json
[09/09 02:19:06 d2.evaluation.coco_evaluation]: Evaluating predictions with official COCO API...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.18s).
Accumulating evaluation results...
DONE (t=0.09s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.239
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.348
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.246
 Average P

In [13]:
torch.save(trainer.model, os.path.join("/content/drive/MyDrive/instseg/", "maskrcnnfull.pth"))