In [1]:
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()
import os
# import some common libraries
import numpy as np
import cv2
import random

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
from detectron2.data.catalog import DatasetCatalog

In [2]:
from detectron2.data.datasets import register_coco_instances
register_coco_instances("my_dataset_train", {}, "sample/train.json", "dataset/imgs/train/")
register_coco_instances("my_dataset_test", {}, "sample/test.json", "dataset/imgs/test/")
register_coco_instances("my_dataset_val", {}, "sample/val.json", "dataset/imgs/val/")

In [3]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("my_dataset_train",)
cfg.DATASETS.TEST = ()

cfg.DATALOADER.NUM_WORKERS = 4
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 4
cfg.SOLVER.BASE_LR = 0.001


cfg.SOLVER.WARMUP_ITERS = 1000
cfg.SOLVER.MAX_ITER = 1600 #adjust up if val mAP is still rising, adjust down if overfit
cfg.SOLVER.STEPS = (1000, 1500)
cfg.SOLVER.GAMMA = 0.05

cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 64
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1

cfg.TEST.EVAL_PERIOD = 500

cfg.OUTPUT_DIR = './output'

In [4]:
from detectron2.engine import DefaultTrainer
from detectron2.evaluation import COCOEvaluator

class CocoTrainer(DefaultTrainer):

  @classmethod
  def build_evaluator(cls, cfg, dataset_name, output_folder=None):

    if output_folder is None:
        os.makedirs("coco_eval", exist_ok=True)
        output_folder = "coco_eval"

    return COCOEvaluator(dataset_name, cfg, False, output_folder)

In [5]:
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = CocoTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

[11/18 19:06:58 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res

[11/18 19:06:58 d2.data.datasets.coco]: Loaded 5552 images in COCO format from sample/train.json
[11/18 19:06:58 d2.data.build]: Removed 0 images with no usable annotations. 5552 images left.
[11/18 19:06:58 d2.data.build]: Distribution of instances among all 1 categories:
|  category  | #instances   |
|:----------:|:-------------|
|    logo    | 6791         |
|            |              |
[11/18 19:06:58 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()]
[11/18 19:06:58 d2.data.build]: Using training sampler TrainingSampler
[11/18 19:06:58 d2.data.common]: Serializing 5552 elements to byte tensors and concatenating them all ...
[11/18 19:06:58 d2.data.common]: Serialized dataset takes 1.42 MiB


Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (2, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (2,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Some model parameters or buffers are not found in the checkpoint:
roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, we

[11/18 19:06:59 d2.engine.train_loop]: Starting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[11/18 19:07:10 d2.utils.events]:  eta: 0:12:33  iter: 19  total_loss: 1.253  loss_cls: 0.6186  loss_box_reg: 0.5573  loss_rpn_cls: 0.04683  loss_rpn_loc: 0.01433  time: 0.4731  data_time: 0.0117  lr: 1.931e-06  max_mem: 4429M
[11/18 19:07:19 d2.utils.events]:  eta: 0:12:23  iter: 39  total_loss: 1.223  loss_cls: 0.5969  loss_box_reg: 0.5502  loss_rpn_cls: 0.03443  loss_rpn_loc: 0.01762  time: 0.4745  data_time: 0.0044  lr: 2.911e-06  max_mem: 4429M
[11/18 19:07:29 d2.utils.events]:  eta: 0:12:25  iter: 59  total_loss: 1.235  loss_cls: 0.5839  loss_box_reg: 0.5649  loss_rpn_cls: 0.04519  loss_rpn_loc: 0.01506  time: 0.4801  data_time: 0.0044  lr: 3.891e-06  max_mem: 4429M
[11/18 19:07:39 d2.utils.events]:  eta: 0:12:16  iter: 79  total_loss: 1.211  loss_cls: 0.5498  loss_box_reg: 0.5832  loss_rpn_cls: 0.04761  loss_rpn_loc: 0.02044  time: 0.4813  data_time: 0.0039  lr: 4.871e-06  max_mem: 4429M
[11/18 19:07:48 d2.utils.events]:  eta: 0:12:06  iter: 99  total_loss: 1.147  loss_cls: 0.51

[11/18 19:13:13 d2.utils.events]:  eta: 0:07:17  iter: 739  total_loss: 0.9811  loss_cls: 0.2887  loss_box_reg: 0.6772  loss_rpn_cls: 0.01198  loss_rpn_loc: 0.01424  time: 0.5027  data_time: 0.0041  lr: 3.7211e-05  max_mem: 4851M
[11/18 19:13:23 d2.utils.events]:  eta: 0:07:06  iter: 759  total_loss: 1.054  loss_cls: 0.3096  loss_box_reg: 0.6903  loss_rpn_cls: 0.008894  loss_rpn_loc: 0.0127  time: 0.5027  data_time: 0.0045  lr: 3.8191e-05  max_mem: 4851M
[11/18 19:13:33 d2.utils.events]:  eta: 0:06:56  iter: 779  total_loss: 1.015  loss_cls: 0.2775  loss_box_reg: 0.7198  loss_rpn_cls: 0.009701  loss_rpn_loc: 0.0119  time: 0.5025  data_time: 0.0044  lr: 3.9171e-05  max_mem: 4851M
[11/18 19:13:43 d2.utils.events]:  eta: 0:06:45  iter: 799  total_loss: 0.8963  loss_cls: 0.2596  loss_box_reg: 0.6306  loss_rpn_cls: 0.01153  loss_rpn_loc: 0.01568  time: 0.5023  data_time: 0.0052  lr: 4.0151e-05  max_mem: 4851M
[11/18 19:13:52 d2.utils.events]:  eta: 0:06:35  iter: 819  total_loss: 1.033  los

[11/18 19:19:25 d2.utils.events]:  eta: 0:01:01  iter: 1479  total_loss: 0.6961  loss_cls: 0.1952  loss_box_reg: 0.4944  loss_rpn_cls: 0.008047  loss_rpn_loc: 0.01033  time: 0.5022  data_time: 0.0044  lr: 5e-05  max_mem: 4851M
[11/18 19:19:35 d2.utils.events]:  eta: 0:00:51  iter: 1499  total_loss: 0.7522  loss_cls: 0.2409  loss_box_reg: 0.4895  loss_rpn_cls: 0.0126  loss_rpn_loc: 0.01992  time: 0.5024  data_time: 0.0041  lr: 5e-05  max_mem: 4851M
[11/18 19:19:45 d2.utils.events]:  eta: 0:00:40  iter: 1519  total_loss: 0.756  loss_cls: 0.189  loss_box_reg: 0.5144  loss_rpn_cls: 0.01023  loss_rpn_loc: 0.008463  time: 0.5023  data_time: 0.0049  lr: 2.5e-06  max_mem: 4851M
[11/18 19:19:55 d2.utils.events]:  eta: 0:00:30  iter: 1539  total_loss: 0.7093  loss_cls: 0.2211  loss_box_reg: 0.4722  loss_rpn_cls: 0.005728  loss_rpn_loc: 0.01275  time: 0.5023  data_time: 0.0043  lr: 2.5e-06  max_mem: 4851M
[11/18 19:20:05 d2.utils.events]:  eta: 0:00:20  iter: 1559  total_loss: 0.7477  loss_cls: 0

In [6]:
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader
from detectron2.evaluation import COCOEvaluator, inference_on_dataset

cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.85
predictor = DefaultPredictor(cfg)
evaluator = COCOEvaluator("my_dataset_test", cfg, False, output_dir="./output/")
val_loader = build_detection_test_loader(cfg, "my_dataset_test")
inference_on_dataset(trainer.model, val_loader, evaluator)

[11/18 19:20:44 d2.evaluation.coco_evaluation]: Fast COCO eval is not built. Falling back to official COCO eval.
[11/18 19:20:44 d2.data.datasets.coco]: Loaded 1190 images in COCO format from sample/test.json
[11/18 19:20:44 d2.data.build]: Distribution of instances among all 1 categories:
|  category  | #instances   |
|:----------:|:-------------|
|    logo    | 1467         |
|            |              |
[11/18 19:20:44 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[11/18 19:20:44 d2.data.common]: Serializing 1190 elements to byte tensors and concatenating them all ...
[11/18 19:20:44 d2.data.common]: Serialized dataset takes 0.30 MiB
[11/18 19:20:44 d2.evaluation.evaluator]: Start inference on 1190 batches
[11/18 19:20:45 d2.evaluation.evaluator]: Inference done 11/1190. Dataloading: 0.0005 s/iter. Inference: 0.0707 s/iter. Eval: 0.0001 s/iter. Total: 0.0713 s/iter. 

OrderedDict([('bbox',
              {'AP': 20.482318264943,
               'AP50': 44.947458660991764,
               'AP75': 15.702995161985669,
               'APs': 0.9730248233055633,
               'APm': 9.99125823095685,
               'APl': 25.321799651191967})])