In [1]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

import matplotlib.pyplot as plt

from detectron2.evaluation.coco_evaluation import COCOEvaluator

In [2]:
import torch, detectron2

TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

torch:  1.13 ; cuda:  cu117
detectron2: 0.6


In [3]:
######################### Load Weights ############################

In [4]:
if not "resnet50-0676ba61.pth" in os.listdir():
    !wget https://download.pytorch.org/models/resnet50-0676ba61.pth

In [5]:
if not "resnet50-0676ba61.pkl" in os.listdir():
    !python ./detectron2/tools/convert-torchvision-to-d2.py \
        resnet50-0676ba61.pth \
        resnet50-0676ba61.pkl

In [6]:
!python ./detectron2/tools/convert-torchvision-to-d2.py \
        tempo50_95_2.pth \
        tempo50_95_2.pkl

conv1.weight -> stem.conv1.weight
bn1.weight -> stem.conv1.norm.weight
bn1.bias -> stem.conv1.norm.bias
bn1.running_mean -> stem.conv1.norm.running_mean
bn1.running_var -> stem.conv1.norm.running_var
bn1.num_batches_tracked -> stem.conv1.norm.num_batches_tracked
layer1.0.conv1.weight -> res2.0.conv1.weight
layer1.0.bn1.weight -> res2.0.conv1.norm.weight
layer1.0.bn1.bias -> res2.0.conv1.norm.bias
layer1.0.bn1.running_mean -> res2.0.conv1.norm.running_mean
layer1.0.bn1.running_var -> res2.0.conv1.norm.running_var
layer1.0.bn1.num_batches_tracked -> res2.0.conv1.norm.num_batches_tracked
layer1.0.conv2.weight -> res2.0.conv2.weight
layer1.0.bn2.weight -> res2.0.conv2.norm.weight
layer1.0.bn2.bias -> res2.0.conv2.norm.bias
layer1.0.bn2.running_mean -> res2.0.conv2.norm.running_mean
layer1.0.bn2.running_var -> res2.0.conv2.norm.running_var
layer1.0.bn2.num_batches_tracked -> res2.0.conv2.norm.num_batches_tracked
layer1.0.conv3.weight -> res2.0.conv3.weight
layer1.0.bn3.weight -> res2.0.conv

In [7]:
###################################################################

In [8]:
# if your dataset is in COCO format, this cell can be replaced by the following three lines:
from detectron2.data.datasets import register_coco_instances
register_coco_instances("asl_train", {}, "../datasets/ASL_mask/annotations/instances_Train.json", "../datasets/ASL_mask/images")
register_coco_instances("asl_val", {}, "../datasets/ASL_mask/annotations/instances_Test.json", "../datasets/ASL_mask/images")

In [9]:
asl_metadata = MetadataCatalog.get("asl_train")
dataset_dicts = DatasetCatalog.get("asl_train")

[05/30 19:18:48 d2.data.datasets.coco]: Loaded 120 images in COCO format from ../datasets/ASL_mask/annotations/instances_Train.json


In [10]:
for d in random.sample(dataset_dicts, 10):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=asl_metadata, scale=0.5)
    out = visualizer.draw_dataset_dict(d)
    cv2.imshow("", out.get_image()[:, :, ::-1])
    cv2.waitKey(0)
cv2.destroyAllWindows()

QObject::moveToThread: Current thread (0x7840210) is not the object's thread (0x7aa1bd0).
Cannot move to target thread (0x7840210)

QObject::moveToThread: Current thread (0x7840210) is not the object's thread (0x7aa1bd0).
Cannot move to target thread (0x7840210)

QObject::moveToThread: Current thread (0x7840210) is not the object's thread (0x7aa1bd0).
Cannot move to target thread (0x7840210)

QObject::moveToThread: Current thread (0x7840210) is not the object's thread (0x7aa1bd0).
Cannot move to target thread (0x7840210)

QObject::moveToThread: Current thread (0x7840210) is not the object's thread (0x7aa1bd0).
Cannot move to target thread (0x7840210)

QObject::moveToThread: Current thread (0x7840210) is not the object's thread (0x7aa1bd0).
Cannot move to target thread (0x7840210)

QObject::moveToThread: Current thread (0x7840210) is not the object's thread (0x7aa1bd0).
Cannot move to target thread (0x7840210)

QObject::moveToThread: Current thread (0x7840210) is not the object's thread

In [11]:
from detectron2.engine import DefaultTrainer

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))

cfg.INPUT.MASK_FORMAT='bitmask'

cfg.DATASETS.TRAIN = ("asl_train",)
cfg.DATASETS.TEST = ("asl_val",)
cfg.TEST.EVAL_PERIOD = 1_000

cfg.DATALOADER.NUM_WORKERS = 2
# cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
# cfg.MODEL.WEIGHTS = "resnet50-0676ba61.pkl"
# cfg.MODEL.WEIGHTS = "tempo50_95.pkl"
cfg.MODEL.WEIGHTS = "baseline50.pkl"

cfg.OUTPUT_DIR = "./output_baseline_final/4"
cfg.SOLVER.CHECKPOINT_PERIOD = 1_000

cfg.MODEL.PIXEL_MEAN = [123.675, 116.280, 103.530]
cfg.MODEL.PIXEL_STD = [58.395, 57.120, 57.375]
cfg.MODEL.RESNETS.DEPTH = 50
cfg.MODEL.RESNETS.STRIDE_IN_1X1 = False
cfg.INPUT.FORMAT = "RGB"

cfg.SOLVER.IMS_PER_BATCH = 5  # This is the real "batch size" commonly known to deep learning people
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 15_000    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512   # The "RoIHead batch size". 128 is faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 24  # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.

In [12]:
cfg.DATASETS

CfgNode({'TRAIN': ('asl_train',), 'PROPOSAL_FILES_TRAIN': (), 'PRECOMPUTED_PROPOSAL_TOPK_TRAIN': 2000, 'TEST': ('asl_val',), 'PROPOSAL_FILES_TEST': (), 'PRECOMPUTED_PROPOSAL_TOPK_TEST': 1000})

In [13]:
class Trainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        return COCOEvaluator("asl_val",)

In [14]:
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

[05/30 19:19:01 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res

Some model parameters or buffers are not found in the checkpoint:
backbone.fpn_lateral2.{bias, weight}
backbone.fpn_lateral3.{bias, weight}
backbone.fpn_lateral4.{bias, weight}
backbone.fpn_lateral5.{bias, weight}
backbone.fpn_output2.{bias, weight}
backbone.fpn_output3.{bias, weight}
backbone.fpn_output4.{bias, weight}
backbone.fpn_output5.{bias, weight}
proposal_generator.rpn_head.anchor_deltas.{bias, weight}
proposal_generator.rpn_head.conv.{bias, weight}
proposal_generator.rpn_head.objectness_logits.{bias, weight}
roi_heads.box_head.fc1.{bias, weight}
roi_heads.box_head.fc2.{bias, weight}
roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
roi_heads.mask_head.deconv.{bias, weight}
roi_heads.mask_head.mask_fcn1.{bias, weight}
roi_heads.mask_head.mask_fcn2.{bias, weight}
roi_heads.mask_head.mask_fcn3.{bias, weight}
roi_heads.mask_head.mask_fcn4.{bias, weight}
roi_heads.mask_head.predictor.{bias, weight}
The checkpoint state_dict contains 

[05/30 19:19:01 d2.engine.train_loop]: Starting training from iteration 0


  torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in masks])
  torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in masks])
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[05/30 19:19:14 d2.utils.events]:  eta: 2:26:23  iter: 19  total_loss: 4.419  loss_cls: 3.016  loss_box_reg: 0.01288  loss_mask: 0.6932  loss_rpn_cls: 0.6915  loss_rpn_loc: 0.005949    time: 0.5746  last_time: 0.5849  data_time: 0.0262  last_data_time: 0.0170   lr: 4.9953e-06  max_mem: 4480M
[05/30 19:19:25 d2.utils.events]:  eta: 2:25:30  iter: 39  total_loss: 4.31  loss_cls: 2.901  loss_box_reg: 0.01491  loss_mask: 0.6932  loss_rpn_cls: 0.6913  loss_rpn_loc: 0.005903    time: 0.5690  last_time: 0.5944  data_time: 0.0200  last_data_time: 0.0199   lr: 9.9902e-06  max_mem: 4527M
[05/30 19:19:36 d2.utils.events]:  eta: 2:25:20  iter: 59  total_loss: 4.025  loss_cls: 2.624  loss_box_reg: 0.01395  loss_mask: 0.6931  loss_rpn_cls: 0.6899  loss_rpn_loc: 0.007059    time: 0.5691  last_time: 0.5992  data_time: 0.0199  last_data_time: 0.0200   lr: 1.4985e-05  max_mem: 4569M
[05/30 19:19:48 d2.utils.events]:  eta: 2:25:17  iter: 79  total_loss: 3.231  loss_cls: 1.825  loss_box_reg: 0.01189  loss

KeyboardInterrupt: 

In [None]:
# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

[05/30 18:54:16 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from ./output_tempo_final/5/model_final.pth ...


In [None]:
asl_metadata_test = MetadataCatalog.get("asl_val")
dataset_dicts_test = DatasetCatalog.get("asl_val")

[05/30 18:54:16 d2.data.datasets.coco]: Loaded 120 images in COCO format from ../datasets/ASL_mask/annotations/instances_Test.json


In [None]:
from detectron2.utils.visualizer import ColorMode

for d in random.sample(dataset_dicts_test, 20):    
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
    v = Visualizer(im[:, :, ::-1],
                   metadata=asl_metadata_test, 
                   scale=0.5, 
                   instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels. This option is only available for segmentation models
    )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    cv2.imshow("", out.get_image()[:, :, ::-1])
    cv2.waitKey(0)
cv2.destroyAllWindows()

QObject::moveToThread: Current thread (0x79e1230) is not the object's thread (0x7b5f1a0).
Cannot move to target thread (0x79e1230)

QObject::moveToThread: Current thread (0x79e1230) is not the object's thread (0x7b5f1a0).
Cannot move to target thread (0x79e1230)

QObject::moveToThread: Current thread (0x79e1230) is not the object's thread (0x7b5f1a0).
Cannot move to target thread (0x79e1230)

QObject::moveToThread: Current thread (0x79e1230) is not the object's thread (0x7b5f1a0).
Cannot move to target thread (0x79e1230)

QObject::moveToThread: Current thread (0x79e1230) is not the object's thread (0x7b5f1a0).
Cannot move to target thread (0x79e1230)

QObject::moveToThread: Current thread (0x79e1230) is not the object's thread (0x7b5f1a0).
Cannot move to target thread (0x79e1230)

QObject::moveToThread: Current thread (0x79e1230) is not the object's thread (0x7b5f1a0).
Cannot move to target thread (0x79e1230)

QObject::moveToThread: Current thread (0x79e1230) is not the object's thread

In [None]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
evaluator = COCOEvaluator("asl_val", output_dir="./output")
val_loader = build_detection_test_loader(cfg, "asl_val")
print(inference_on_dataset(predictor.model, val_loader, evaluator))
# another equivalent way to evaluate the model is to use `trainer.test`

[05/30 18:54:55 d2.data.datasets.coco]: Loaded 120 images in COCO format from ../datasets/ASL_mask/annotations/instances_Test.json
[05/30 18:54:55 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[05/30 18:54:55 d2.data.common]: Serializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[05/30 18:54:55 d2.data.common]: Serializing 120 elements to byte tensors and concatenating them all ...
[05/30 18:54:55 d2.data.common]: Serialized dataset takes 0.08 MiB
[05/30 18:54:55 d2.evaluation.evaluator]: Start inference on 120 batches
[05/30 18:54:56 d2.evaluation.evaluator]: Inference done 11/120. Dataloading: 0.0007 s/iter. Inference: 0.0498 s/iter. Eval: 0.0003 s/iter. Total: 0.0507 s/iter. ETA=0:00:05
[05/30 18:55:01 d2.evaluation.evaluator]: Inference done 107/120. Dataloading: 0.0008 s/iter. Inference: 0.0511 s/iter. Eval: 0.0003 s/iter. Total: 0.052