In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
!python -m pip install pyyaml
import sys, os, distutils.core
# Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities (e.g. compiled operators).
# See https://detectron2.readthedocs.io/tutorials/install.html for full installation instructions
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))

# Properly install detectron2. (Please do not install twice in both ways)
# !python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

In [None]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0
torch:  2.2 ; cuda:  cu121
detectron2: 0.6


In [None]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

In [None]:
data_dir_path = "/content/drive/MyDrive/instseg/data/"

In [None]:
from detectron2.data.datasets import register_coco_instances
register_coco_instances("taco_train", {}, data_dir_path + "annotations_0_train.json", data_dir_path + "images/")
register_coco_instances("taco_val", {}, data_dir_path + "annotations_0_val.json", data_dir_path + "images/")
register_coco_instances("taco_test", {}, data_dir_path + "annotations_0_test.json", data_dir_path + "images/")

In [None]:
import json

# Load your JSON file
with open('/content/drive/MyDrive/instseg/data/annotations_0_train.json') as f:
    data = json.load(f)

# Generate unique IDs
unique_id = 0
for annotation in data['annotations']:
    annotation['id'] = unique_id
    unique_id += 1

# Save the corrected JSON back to file
with open('/content/drive/MyDrive/instseg/data/annotations_0_train.json', 'w') as f:
    json.dump(data, f, indent=4)


In [None]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [None]:
from detectron2.evaluation import COCOEvaluator
from detectron2.engine import DefaultTrainer

In [None]:
class Trainer(DefaultTrainer):
  @classmethod
  def build_evaluator(cls, cfg, dataset_name, output_folder=None):
    if output_folder is None:
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
    return COCOEvaluator(dataset_name, output_dir=output_folder)

In [None]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("taco_train",)
cfg.DATASETS.TEST = ("taco_val",)
cfg.TEST.EVAL_PERIOD = 100
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 300
cfg.SOLVER.STEPS = []
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 60
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

[05/13 11:25:25 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res

roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
roi_heads.mask_head.predictor.{bias, weight}


[05/13 11:25:26 d2.engine.train_loop]: Starting training from iteration 0


  self.pid = os.fork()


[05/13 11:25:48 d2.utils.events]:  eta: 0:03:58  iter: 19  total_loss: 5.794  loss_cls: 4.188  loss_box_reg: 0.71  loss_mask: 0.6921  loss_rpn_cls: 0.2146  loss_rpn_loc: 0.03303    time: 1.0612  last_time: 1.2036  data_time: 0.3982  last_data_time: 0.6644   lr: 1.6068e-05  max_mem: 3261M
[05/13 11:26:15 d2.utils.events]:  eta: 0:04:05  iter: 39  total_loss: 5.245  loss_cls: 3.733  loss_box_reg: 0.5451  loss_mask: 0.6892  loss_rpn_cls: 0.1399  loss_rpn_loc: 0.02677    time: 1.2154  last_time: 0.7119  data_time: 0.6528  last_data_time: 0.1878   lr: 3.2718e-05  max_mem: 3263M
[05/13 11:26:37 d2.utils.events]:  eta: 0:03:58  iter: 59  total_loss: 3.95  loss_cls: 2.57  loss_box_reg: 0.5504  loss_mask: 0.6884  loss_rpn_cls: 0.02641  loss_rpn_loc: 0.01063    time: 1.1728  last_time: 1.0533  data_time: 0.3952  last_data_time: 0.0018   lr: 4.9367e-05  max_mem: 3353M
[05/13 11:26:59 d2.utils.events]:  eta: 0:03:42  iter: 79  total_loss: 2.425  loss_cls: 1.116  loss_box_reg: 0.5834  loss_mask: 0.

  self.pid = os.fork()


[05/13 11:27:26 d2.evaluation.evaluator]: Inference done 11/150. Dataloading: 0.3174 s/iter. Inference: 0.1458 s/iter. Eval: 0.0001 s/iter. Total: 0.4633 s/iter. ETA=0:01:04
[05/13 11:27:31 d2.evaluation.evaluator]: Inference done 20/150. Dataloading: 0.3622 s/iter. Inference: 0.1691 s/iter. Eval: 0.0001 s/iter. Total: 0.5315 s/iter. ETA=0:01:09
[05/13 11:27:36 d2.evaluation.evaluator]: Inference done 25/150. Dataloading: 0.4695 s/iter. Inference: 0.2080 s/iter. Eval: 0.0001 s/iter. Total: 0.6782 s/iter. ETA=0:01:24
[05/13 11:27:41 d2.evaluation.evaluator]: Inference done 30/150. Dataloading: 0.5236 s/iter. Inference: 0.2220 s/iter. Eval: 0.0001 s/iter. Total: 0.7466 s/iter. ETA=0:01:29
[05/13 11:27:48 d2.evaluation.evaluator]: Inference done 38/150. Dataloading: 0.5420 s/iter. Inference: 0.2117 s/iter. Eval: 0.0001 s/iter. Total: 0.7547 s/iter. ETA=0:01:24
[05/13 11:27:53 d2.evaluation.evaluator]: Inference done 46/150. Dataloading: 0.5275 s/iter. Inference: 0.2181 s/iter. Eval: 0.000

  self.pid = os.fork()


[05/13 11:30:43 d2.evaluation.evaluator]: Inference done 11/150. Dataloading: 0.0901 s/iter. Inference: 0.2446 s/iter. Eval: 0.0001 s/iter. Total: 0.3348 s/iter. ETA=0:00:46
[05/13 11:30:48 d2.evaluation.evaluator]: Inference done 21/150. Dataloading: 0.1261 s/iter. Inference: 0.3379 s/iter. Eval: 0.0005 s/iter. Total: 0.4651 s/iter. ETA=0:01:00
[05/13 11:30:54 d2.evaluation.evaluator]: Inference done 27/150. Dataloading: 0.2573 s/iter. Inference: 0.3145 s/iter. Eval: 0.0004 s/iter. Total: 0.5734 s/iter. ETA=0:01:10
[05/13 11:30:59 d2.evaluation.evaluator]: Inference done 38/150. Dataloading: 0.2493 s/iter. Inference: 0.2854 s/iter. Eval: 0.0003 s/iter. Total: 0.5358 s/iter. ETA=0:01:00
[05/13 11:31:05 d2.evaluation.evaluator]: Inference done 47/150. Dataloading: 0.2765 s/iter. Inference: 0.2891 s/iter. Eval: 0.0003 s/iter. Total: 0.5666 s/iter. ETA=0:00:58
[05/13 11:31:10 d2.evaluation.evaluator]: Inference done 54/150. Dataloading: 0.2800 s/iter. Inference: 0.3104 s/iter. Eval: 0.000

  self.pid = os.fork()


[05/13 11:33:51 d2.evaluation.evaluator]: Inference done 11/150. Dataloading: 0.0018 s/iter. Inference: 0.2251 s/iter. Eval: 0.3592 s/iter. Total: 0.5862 s/iter. ETA=0:01:21
[05/13 11:33:57 d2.evaluation.evaluator]: Inference done 18/150. Dataloading: 0.0174 s/iter. Inference: 0.3148 s/iter. Eval: 0.4173 s/iter. Total: 0.7503 s/iter. ETA=0:01:39
[05/13 11:34:05 d2.evaluation.evaluator]: Inference done 24/150. Dataloading: 0.0484 s/iter. Inference: 0.3342 s/iter. Eval: 0.5732 s/iter. Total: 0.9566 s/iter. ETA=0:02:00
[05/13 11:34:12 d2.evaluation.evaluator]: Inference done 30/150. Dataloading: 0.0380 s/iter. Inference: 0.3319 s/iter. Eval: 0.6313 s/iter. Total: 1.0018 s/iter. ETA=0:02:00
[05/13 11:34:18 d2.evaluation.evaluator]: Inference done 34/150. Dataloading: 0.0330 s/iter. Inference: 0.3353 s/iter. Eval: 0.6805 s/iter. Total: 1.0497 s/iter. ETA=0:02:01
[05/13 11:34:23 d2.evaluation.evaluator]: Inference done 38/150. Dataloading: 0.0293 s/iter. Inference: 0.3266 s/iter. Eval: 0.719

  self.pid = os.fork()


[05/13 11:35:52 d2.evaluation.evaluator]: Total inference time: 0:02:05.018587 (0.862197 s / iter per device, on 1 devices)
[05/13 11:35:52 d2.evaluation.evaluator]: Total inference pure compute time: 0:00:40 (0.281371 s / iter per device, on 1 devices)
[05/13 11:35:52 d2.evaluation.coco_evaluation]: Preparing results for COCO format ...
[05/13 11:35:52 d2.evaluation.coco_evaluation]: Saving results to ./output/inference/coco_instances_results.json
[05/13 11:35:52 d2.evaluation.coco_evaluation]: Evaluating predictions with official COCO API...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.79s).
Accumulating evaluation results...
DONE (t=0.32s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.002
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.005
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.00