In [None]:
# Train pokemon data set

## Data loader

In [1]:
from detectron2.data.datasets import register_coco_instances, load_coco_json
from detectron2.data import DatasetCatalog, MetadataCatalog

DATASET_DIR="../../datasets/pokemon/v5/"
COCO_JSON_FILE_NAME="_annotations.coco.json"

TRAIN_JSON_PATH = DATASET_DIR + "train/" +COCO_JSON_FILE_NAME
TRAIN_IMG_PATH = DATASET_DIR + "train"

VALID_JSON_PATH = DATASET_DIR + "valid/" +COCO_JSON_FILE_NAME
VALID_IMG_PATH = DATASET_DIR + "valid"

TEST_JSON_PATH = DATASET_DIR + "test/" +COCO_JSON_FILE_NAME
TEST_IMG_PATH = DATASET_DIR + "test"

# register_coco_instances("pokemon_train", {}, TRAIN_JSON_PATH, TRAIN_IMG_PATH)
# register_coco_instances("pokemon_valid", {}, VALID_JSON_PATH, VALID_IMG_PATH)
# register_coco_instances("pokemon_test", {}, TEST_JSON_PATH, TEST_IMG_PATH)

DatasetCatalog.register("pokemon_train", lambda: load_coco_json(TRAIN_JSON_PATH, TRAIN_IMG_PATH,"pokemon_train"))
MetadataCatalog.get("pokemon_train").set(thing_classes=["gengar"],json_file=TRAIN_JSON_PATH,image_root=TRAIN_IMG_PATH)

DatasetCatalog.register("pokemon_valid", lambda: load_coco_json(VALID_JSON_PATH, VALID_IMG_PATH,"pokemon_valid"))
MetadataCatalog.get("pokemon_valid").set(thing_classes=["gengar"],json_file=VALID_JSON_PATH,image_root=VALID_IMG_PATH)

namespace(name='pokemon_valid',
          thing_classes=['gengar'],
          json_file='../../datasets/pokemon/v5/valid/_annotations.coco.json',
          image_root='../../datasets/pokemon/v5/valid')

## Verify dataset

In [2]:
import random,time
import cv2
from detectron2.data.datasets import load_coco_json
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

dataset_train_dicts = load_coco_json(TRAIN_JSON_PATH, TRAIN_IMG_PATH, "pokemon_train")
pokemon_metadata = MetadataCatalog.get("pokemon_train")

for d in random.sample(dataset_train_dicts, 3):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=pokemon_metadata, scale=0.5)
    out = visualizer.draw_dataset_dict(d)
    cv2.imshow(str(round(time.time()*1000))+'.jpg', out.get_image()[:, :, ::-1])
    if cv2.waitKey(0) == 27:
        break  # esc to quit
    cv2.destroyAllWindows()

## Config

In [12]:
from detectron2 import model_zoo
from detectron2.config import get_cfg

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("pokemon_train")
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2  # This is the real "batch size" commonly known to deep learning people
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 600    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # The "RoIHead batch size". 128 is faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.


## Train

In [13]:
from detectron2.engine import DefaultTrainer
import os

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

[32m[08/09 14:25:11 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (2, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (2,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (1, 256, 1, 1) in

[32m[08/09 14:25:11 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[08/09 14:25:14 d2.utils.events]: [0m eta: 0:02:54  iter: 19  total_loss: 1.813  loss_cls: 0.6604  loss_box_reg: 0.4634  loss_mask: 0.6908  loss_rpn_cls: 0.003779  loss_rpn_loc: 0.009138    time: 0.1754  last_time: 0.1589  data_time: 0.0059  last_data_time: 0.0023   lr: 4.9953e-06  max_mem: 2262M
[32m[08/09 14:25:18 d2.utils.events]: [0m eta: 0:02:50  iter: 39  total_loss: 1.788  loss_cls: 0.6071  loss_box_reg: 0.4377  loss_mask: 0.6787  loss_rpn_cls: 0.001399  loss_rpn_loc: 0.004671    time: 0.1746  last_time: 0.1472  data_time: 0.0023  last_data_time: 0.0022   lr: 9.9902e-06  max_mem: 2262M
[32m[08/09 14:25:21 d2.utils.events]: [0m eta: 0:02:48  iter: 59  total_loss: 1.684  loss_cls: 0.507  loss_box_reg: 0.4813  loss_mask: 0.656  loss_rpn_cls: 0.004437  loss_rpn_loc: 0.00902    time: 0.1764  last_time: 0.1884  data_time: 0.0021  last_data_time: 0.0020   lr: 1.4985e-05  max_mem: 2262M
[32m[08

## Inference

In [14]:
from detectron2.engine import DefaultPredictor
import os

# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

[32m[08/09 14:28:50 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from ./output/model_final.pth ...


  return torch.load(f, map_location=torch.device("cpu"))


## Predict

In [15]:
from detectron2.utils.visualizer import ColorMode
from detectron2.utils.visualizer import Visualizer
import time, cv2

dataset_dicts = load_coco_json(DATASET_DIR + "valid/" +COCO_JSON_FILE_NAME, DATASET_DIR + "valid", "pokemon_train_dataset")

for d in random.sample(dataset_dicts, 3 if len(dataset_dicts)>3 else len(dataset_dicts)):    
    im = cv2.imread(d["file_name"])
    # im = cv2.imread("../../datasets/pokemon/" + "1.jpeg")
    outputs = predictor(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
    v = Visualizer(im[:, :, ::-1],
                   metadata=pokemon_metadata, 
                   scale=0.5, 
                   instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels. This option is only available for segmentation models
    )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    image_name = str(round(time.time()*1000))+'.jpg'
    cv2.imshow(image_name, out.get_image()[:, :, ::-1])
    if cv2.waitKey(0) == 27:
        break  # esc to quit
    cv2.destroyAllWindows()


[32m[08/09 14:28:52 d2.data.datasets.coco]: [0mLoaded 10 images in COCO format from ../../datasets/pokemon/v5/valid/_annotations.coco.json


## Evaluate

In [5]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

evaluator = COCOEvaluator("pokemon_valid", output_dir="./output")
val_loader = build_detection_test_loader(cfg, "pokemon_valid")
print(inference_on_dataset(predictor.model, val_loader, evaluator))
# another equivalent way to evaluate the model is to use `trainer.test`

[32m[08/09 14:19:37 d2.data.datasets.coco]: [0mLoaded 10 images in COCO format from ../../datasets/pokemon/v5/valid/_annotations.coco.json
[32m[08/09 14:19:37 d2.data.build]: [0mDistribution of instances among all 1 categories:
[36m|  category  | #instances   |
|:----------:|:-------------|
|   gengar   | 10           |
|            |              |[0m
[32m[08/09 14:19:37 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[08/09 14:19:37 d2.data.common]: [0mSerializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[32m[08/09 14:19:37 d2.data.common]: [0mSerializing 10 elements to byte tensors and concatenating them all ...
[32m[08/09 14:19:37 d2.data.common]: [0mSerialized dataset takes 0.01 MiB


NameError: name 'predictor' is not defined