In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
!python -m pip install pyyaml
import sys, os, distutils.core
# Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities (e.g. compiled operators).
# See https://detectron2.readthedocs.io/tutorials/install.html for full installation instructions
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))

# Properly install detectron2. (Please do not install twice in both ways)
# !python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

In [3]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0
torch:  2.3 ; cuda:  cu121
detectron2: 0.6


In [4]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

In [5]:
data_dir_path = "/content/drive/MyDrive/instseg/data/"

In [6]:
from detectron2.data.datasets import register_coco_instances
register_coco_instances("taco_train", {}, data_dir_path + "mapped_annotations_0_train.json", data_dir_path + "images/")
register_coco_instances("taco_val", {}, data_dir_path + "mapped_annotations_0_val.json", data_dir_path + "images/")
register_coco_instances("taco_test", {}, data_dir_path + "mapped_annotations_0_test.json", data_dir_path + "images/")

In [7]:
import json

# Load your JSON file
with open('/content/drive/MyDrive/instseg/data/mapped_annotations_0_train.json') as f:
    data = json.load(f)

# Generate unique IDs
unique_id = 0
for annotation in data['annotations']:
    annotation['id'] = unique_id
    unique_id += 1

# Save the corrected JSON back to file
with open('/content/drive/MyDrive/instseg/data/mapped_annotations_0_train.json', 'w') as f:
    json.dump(data, f, indent=4)


In [8]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [9]:
from detectron2.evaluation import COCOEvaluator
from detectron2.engine import DefaultTrainer

In [10]:
from detectron2.data import build_detection_train_loader
class Trainer(DefaultTrainer):
  @classmethod
  def build_evaluator(cls, cfg, dataset_name, output_folder=None):
    if output_folder is None:
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
    return COCOEvaluator(dataset_name, output_dir=output_folder)

In [11]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("taco_train",)
cfg.DATASETS.TEST = ("taco_val",)
cfg.TEST.EVAL_PERIOD = 100
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.001
cfg.SOLVER.MAX_ITER = 500
cfg.SOLVER.STEPS = []
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 10
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

[07/25 07:01:40 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res

model_final_f10217.pkl: 178MB [00:02, 79.5MB/s]                          
roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
roi_heads.mask_head.predictor.{bias, weight}


[07/25 07:01:43 d2.engine.train_loop]: Starting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[07/25 07:02:18 d2.utils.events]:  eta: 0:09:31  iter: 19  total_loss: 3.847  loss_cls: 2.245  loss_box_reg: 0.6365  loss_mask: 0.6904  loss_rpn_cls: 0.1009  loss_rpn_loc: 0.05004    time: 1.4038  last_time: 1.0003  data_time: 0.8252  last_data_time: 0.4354   lr: 3.8962e-05  max_mem: 2576M
[07/25 07:02:46 d2.utils.events]:  eta: 0:08:43  iter: 39  total_loss: 3.097  loss_cls: 1.321  loss_box_reg: 0.564  loss_mask: 0.6762  loss_rpn_cls: 0.04346  loss_rpn_loc: 0.02167    time: 1.2940  last_time: 3.6494  data_time: 0.6154  last_data_time: 2.7406   lr: 7.8922e-05  max_mem: 2669M
[07/25 07:03:10 d2.utils.events]:  eta: 0:08:46  iter: 59  total_loss: 2.091  loss_cls: 0.6827  loss_box_reg: 0.5158  loss_mask: 0.6479  loss_rpn_cls: 0.06301  loss_rpn_loc: 0.02084    time: 1.2582  last_time: 1.5014  data_time: 0.6805  last_data_time: 1.0571   lr: 0.00011888  max_mem: 2669M
[07/25 07:03:36 d2.utils.events]:  eta: 0:08:24  iter: 79  total_loss: 1.647  loss_cls: 0.533  loss_box_reg: 0.4881  loss_mas

  self.pid = os.fork()


[07/25 07:04:27 d2.evaluation.evaluator]: Inference done 11/150. Dataloading: 0.0016 s/iter. Inference: 0.3794 s/iter. Eval: 1.8535 s/iter. Total: 2.2345 s/iter. ETA=0:05:10
[07/25 07:04:34 d2.evaluation.evaluator]: Inference done 14/150. Dataloading: 0.0025 s/iter. Inference: 0.3609 s/iter. Eval: 1.8653 s/iter. Total: 2.2293 s/iter. ETA=0:05:03
[07/25 07:04:41 d2.evaluation.evaluator]: Inference done 19/150. Dataloading: 0.0024 s/iter. Inference: 0.3147 s/iter. Eval: 1.5938 s/iter. Total: 1.9114 s/iter. ETA=0:04:10
[07/25 07:04:46 d2.evaluation.evaluator]: Inference done 21/150. Dataloading: 0.0027 s/iter. Inference: 0.3205 s/iter. Eval: 1.7018 s/iter. Total: 2.0256 s/iter. ETA=0:04:21
[07/25 07:04:54 d2.evaluation.evaluator]: Inference done 23/150. Dataloading: 0.0026 s/iter. Inference: 0.3389 s/iter. Eval: 1.8685 s/iter. Total: 2.2109 s/iter. ETA=0:04:40
[07/25 07:05:02 d2.evaluation.evaluator]: Inference done 24/150. Dataloading: 0.0026 s/iter. Inference: 0.3771 s/iter. Eval: 2.122

  self.pid = os.fork()


[07/25 07:12:34 d2.evaluation.evaluator]: Inference done 11/150. Dataloading: 0.0018 s/iter. Inference: 0.3920 s/iter. Eval: 1.7096 s/iter. Total: 2.1034 s/iter. ETA=0:04:52
[07/25 07:12:41 d2.evaluation.evaluator]: Inference done 14/150. Dataloading: 0.0021 s/iter. Inference: 0.3942 s/iter. Eval: 1.7649 s/iter. Total: 2.1617 s/iter. ETA=0:04:53
[07/25 07:12:48 d2.evaluation.evaluator]: Inference done 19/150. Dataloading: 0.0022 s/iter. Inference: 0.3445 s/iter. Eval: 1.5327 s/iter. Total: 1.8798 s/iter. ETA=0:04:06
[07/25 07:12:54 d2.evaluation.evaluator]: Inference done 21/150. Dataloading: 0.0029 s/iter. Inference: 0.3613 s/iter. Eval: 1.6708 s/iter. Total: 2.0355 s/iter. ETA=0:04:22
[07/25 07:13:04 d2.evaluation.evaluator]: Inference done 23/150. Dataloading: 0.0034 s/iter. Inference: 0.4019 s/iter. Eval: 1.9478 s/iter. Total: 2.3539 s/iter. ETA=0:04:58
[07/25 07:13:13 d2.evaluation.evaluator]: Inference done 24/150. Dataloading: 0.0033 s/iter. Inference: 0.4271 s/iter. Eval: 2.262

  self.pid = os.fork()


[07/25 07:18:28 d2.evaluation.evaluator]: Total inference time: 0:06:06.157393 (2.525223 s / iter per device, on 1 devices)
[07/25 07:18:28 d2.evaluation.evaluator]: Total inference pure compute time: 0:01:00 (0.415340 s / iter per device, on 1 devices)
[07/25 07:18:28 d2.evaluation.coco_evaluation]: Preparing results for COCO format ...
[07/25 07:18:28 d2.evaluation.coco_evaluation]: Saving results to ./output/inference/coco_instances_results.json
[07/25 07:18:28 d2.evaluation.coco_evaluation]: Evaluating predictions with official COCO API...
Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.51s).
Accumulating evaluation results...
DONE (t=0.21s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.033
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.075
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.01

  self.pid = os.fork()


[07/25 07:20:51 d2.evaluation.evaluator]: Inference done 11/150. Dataloading: 0.0018 s/iter. Inference: 0.3778 s/iter. Eval: 1.7150 s/iter. Total: 2.0946 s/iter. ETA=0:04:51
[07/25 07:20:57 d2.evaluation.evaluator]: Inference done 14/150. Dataloading: 0.0035 s/iter. Inference: 0.3660 s/iter. Eval: 1.7029 s/iter. Total: 2.0728 s/iter. ETA=0:04:41
[07/25 07:21:04 d2.evaluation.evaluator]: Inference done 19/150. Dataloading: 0.0037 s/iter. Inference: 0.3506 s/iter. Eval: 1.4802 s/iter. Total: 1.8349 s/iter. ETA=0:04:00
[07/25 07:21:09 d2.evaluation.evaluator]: Inference done 22/150. Dataloading: 0.0034 s/iter. Inference: 0.3454 s/iter. Eval: 1.4771 s/iter. Total: 1.8262 s/iter. ETA=0:03:53
[07/25 07:21:25 d2.evaluation.evaluator]: Inference done 23/150. Dataloading: 0.0033 s/iter. Inference: 0.3750 s/iter. Eval: 2.2249 s/iter. Total: 2.6037 s/iter. ETA=0:05:30
[07/25 07:21:40 d2.evaluation.evaluator]: Inference done 24/150. Dataloading: 0.0033 s/iter. Inference: 0.4016 s/iter. Eval: 2.872

  self.pid = os.fork()


[07/25 07:26:48 d2.evaluation.evaluator]: Total inference time: 0:06:10.366819 (2.554254 s / iter per device, on 1 devices)
[07/25 07:26:48 d2.evaluation.evaluator]: Total inference pure compute time: 0:00:59 (0.408918 s / iter per device, on 1 devices)
[07/25 07:26:49 d2.evaluation.coco_evaluation]: Preparing results for COCO format ...
[07/25 07:26:49 d2.evaluation.coco_evaluation]: Saving results to ./output/inference/coco_instances_results.json
[07/25 07:26:49 d2.evaluation.coco_evaluation]: Evaluating predictions with official COCO API...
Loading and preparing results...
DONE (t=0.02s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.86s).
Accumulating evaluation results...
DONE (t=0.42s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.066
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.127
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.06

  self.pid = os.fork()


[07/25 07:29:02 d2.evaluation.evaluator]: Inference done 11/150. Dataloading: 0.0035 s/iter. Inference: 0.2894 s/iter. Eval: 1.1928 s/iter. Total: 1.4857 s/iter. ETA=0:03:26
[07/25 07:29:07 d2.evaluation.evaluator]: Inference done 17/150. Dataloading: 0.0038 s/iter. Inference: 0.2514 s/iter. Eval: 0.9321 s/iter. Total: 1.1879 s/iter. ETA=0:02:37
[07/25 07:29:13 d2.evaluation.evaluator]: Inference done 22/150. Dataloading: 0.0035 s/iter. Inference: 0.2490 s/iter. Eval: 0.9372 s/iter. Total: 1.1901 s/iter. ETA=0:02:32
[07/25 07:29:22 d2.evaluation.evaluator]: Inference done 23/150. Dataloading: 0.0036 s/iter. Inference: 0.2792 s/iter. Eval: 1.3299 s/iter. Total: 1.6132 s/iter. ETA=0:03:24
[07/25 07:29:29 d2.evaluation.evaluator]: Inference done 24/150. Dataloading: 0.0036 s/iter. Inference: 0.3096 s/iter. Eval: 1.5707 s/iter. Total: 1.8846 s/iter. ETA=0:03:57
[07/25 07:29:36 d2.evaluation.evaluator]: Inference done 27/150. Dataloading: 0.0036 s/iter. Inference: 0.3140 s/iter. Eval: 1.633

  self.pid = os.fork()


[07/25 07:33:15 d2.evaluation.evaluator]: Total inference time: 0:04:22.346486 (1.809286 s / iter per device, on 1 devices)
[07/25 07:33:15 d2.evaluation.evaluator]: Total inference pure compute time: 0:00:45 (0.316889 s / iter per device, on 1 devices)
[07/25 07:33:15 d2.evaluation.coco_evaluation]: Preparing results for COCO format ...
[07/25 07:33:15 d2.evaluation.coco_evaluation]: Saving results to ./output/inference/coco_instances_results.json
[07/25 07:33:15 d2.evaluation.coco_evaluation]: Evaluating predictions with official COCO API...
Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.38s).
Accumulating evaluation results...
DONE (t=0.19s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.116
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.195
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.13

  self.pid = os.fork()


[07/25 07:35:38 d2.evaluation.evaluator]: Inference done 11/150. Dataloading: 0.0033 s/iter. Inference: 0.2963 s/iter. Eval: 1.0158 s/iter. Total: 1.3154 s/iter. ETA=0:03:02
[07/25 07:35:43 d2.evaluation.evaluator]: Inference done 19/150. Dataloading: 0.0045 s/iter. Inference: 0.2366 s/iter. Eval: 0.6858 s/iter. Total: 0.9272 s/iter. ETA=0:02:01
[07/25 07:35:49 d2.evaluation.evaluator]: Inference done 23/150. Dataloading: 0.0047 s/iter. Inference: 0.2430 s/iter. Eval: 0.8129 s/iter. Total: 1.0611 s/iter. ETA=0:02:14
[07/25 07:35:56 d2.evaluation.evaluator]: Inference done 24/150. Dataloading: 0.0047 s/iter. Inference: 0.2662 s/iter. Eval: 1.0795 s/iter. Total: 1.3511 s/iter. ETA=0:02:50
[07/25 07:36:02 d2.evaluation.evaluator]: Inference done 29/150. Dataloading: 0.0041 s/iter. Inference: 0.2580 s/iter. Eval: 1.0709 s/iter. Total: 1.3337 s/iter. ETA=0:02:41
[07/25 07:36:09 d2.evaluation.evaluator]: Inference done 31/150. Dataloading: 0.0040 s/iter. Inference: 0.2646 s/iter. Eval: 1.223

  self.pid = os.fork()


[07/25 07:38:33 d2.evaluation.evaluator]: Total inference time: 0:03:02.632409 (1.259534 s / iter per device, on 1 devices)
[07/25 07:38:33 d2.evaluation.evaluator]: Total inference pure compute time: 0:00:37 (0.258074 s / iter per device, on 1 devices)
[07/25 07:38:33 d2.evaluation.coco_evaluation]: Preparing results for COCO format ...
[07/25 07:38:33 d2.evaluation.coco_evaluation]: Saving results to ./output/inference/coco_instances_results.json
[07/25 07:38:33 d2.evaluation.coco_evaluation]: Evaluating predictions with official COCO API...
Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.55s).
Accumulating evaluation results...
DONE (t=0.27s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.140
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.220
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.16