In [1]:
import torch, detectron2
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

  from .autonotebook import tqdm as notebook_tqdm


torch:  1.14 ; cuda:  1.14.0.dev20221014
detectron2: 0.6


In [2]:
from detectron2.utils.logger import setup_logger
setup_logger()

import numpy as np
import os
from detectron2 import model_zoo
from detectron2.data import MetadataCatalog, DatasetCatalog, DatasetMapper, build_detection_train_loader, build_detection_test_loader
from detectron2.data.datasets import register_coco_instances
from detectron2.evaluation import COCOEvaluator
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2.engine.hooks import HookBase
from detectron2.utils.logger import log_every_n_seconds
from detectron2.checkpoint import DetectionCheckpointer, Checkpointer
import detectron2.utils.comm as comm
from detectron2.utils.visualizer import Visualizer
from detectron2.evaluation.fast_eval_api import COCOeval_opt
import glob
import cv2
from matplotlib import pyplot as plt
import torch
import time
import datetime
import logging

In [5]:
project_path = "../"
register_coco_instances("train", {}, os.path.join(project_path, "data/annotations/train_cats_3411.json"), os.path.join(project_path, "data/train"))
register_coco_instances("val", {}, os.path.join(project_path, "data/annotations/val_cats_3411.json"), os.path.join(project_path, "data/val"))
register_coco_instances("test", {}, os.path.join(project_path, "data/annotations/test_cats_3411.json"), os.path.join(project_path, "data/test"))

In [6]:
cfg = get_cfg()
# Use pretrained model parameters of mask_rcnn_R_50_FPN_3x.yaml
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("train",)
cfg.DATASETS.TEST = ("val",)
cfg.DATALOADER.NUM_WORKERS = 2 # Dataloader workers
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") # Model weights
cfg.SOLVER.IMS_PER_BATCH = 5 # Images per batch
cfg.SOLVER.BASE_LR = 0.001 # Base learning rate
cfg.SOLVER.MAX_ITER = 10000 # Max iteration
cfg.SOLVER.STEPS = (2000,3000,4000,5000,) # Steps on decaying lr
cfg.SOLVER.NUM_DECAYS = 4 # Total lr decay
cfg.SOLVER.GAMMA = 0.2 # Decay to gamma times previous lr
cfg.SOLVER.CHECKPOINT_PERIOD = 1000 # Save checkpoint every 1000 iterations
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 14
cfg.MODEL.MASK_ON = True # Mask
cfg.OUTPUT_DIR = os.path.join(project_path, "notebooks/output/")
cfg.TEST.EVAL_PERIOD = 500
cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [15]:
print(cfg)

CUDNN_BENCHMARK: False
DATALOADER:
  ASPECT_RATIO_GROUPING: True
  FILTER_EMPTY_ANNOTATIONS: True
  NUM_WORKERS: 2
  REPEAT_THRESHOLD: 0.0
  SAMPLER_TRAIN: TrainingSampler
DATASETS:
  PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
  PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
  PROPOSAL_FILES_TEST: ()
  PROPOSAL_FILES_TRAIN: ()
  TEST: ('val',)
  TRAIN: ('train',)
GLOBAL:
  HACK: 1.0
INPUT:
  CROP:
    ENABLED: False
    SIZE: [0.9, 0.9]
    TYPE: relative_range
  FORMAT: BGR
  MASK_FORMAT: polygon
  MAX_SIZE_TEST: 1333
  MAX_SIZE_TRAIN: 1333
  MIN_SIZE_TEST: 800
  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
  MIN_SIZE_TRAIN_SAMPLING: choice
  RANDOM_FLIP: horizontal
MODEL:
  ANCHOR_GENERATOR:
    ANGLES: [[-90, 0, 90]]
    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]
    NAME: DefaultAnchorGenerator
    OFFSET: 0.0
    SIZES: [[32], [64], [128], [256], [512]]
  BACKBONE:
    FREEZE_AT: 2
    NAME: build_resnet_fpn_backbone
  DEVICE: cpu
  FPN:
    FUSE_TYPE: sum
    IN_FEATURES: ['res2', 'res3', 'res4', 'res5

In [8]:
from detectron2.modeling import build_model

In [9]:
model = build_model(cfg)

In [10]:
print(model)

GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res2): Sequential(
        (0): BottleneckBlock

In [12]:
print(type(model.roi_heads))

<class 'detectron2.modeling.roi_heads.roi_heads.StandardROIHeads'>


In [14]:
from detectron2.modeling import ROI_HEADS_REGISTRY, roi_heads, ShapeSpec
from torch import nn

In [None]:
@ROI_HEADS_REGISTRY.register()
class ALScorePredHead(roi_heads.roi_heads.StandardROIHeads):
    def __init__(
            self,
            *,
            box_scorer: nn.Module,
            mask_scorer: nn.Module,
            **kwargs,
            ):
        super().__init__(**kwargs)
        self.box_scorer = box_scorer
        self.mask_scorer = mask_scorer