In [1]:
# import some common libraries
import numpy as np
import cv2
import random

import torch, torchvision
print(torch.__version__, torch.cuda.is_available())

# Detectron
import detectron2

# Helper Library
import os
import json
from PIL import Image
import matplotlib
from matplotlib import pyplot as plt

1.7.1+cu110 True


## Helper Function

#### Showing Image

In [2]:
## Helper
from detectron2.utils.logger import setup_logger
from detectron2.utils.visualizer import Visualizer
setup_logger()

def cv2_imshow(img):
    %matplotlib inline
    img = img[:,:,[2,1,0]]
    img = Image.fromarray(img, mode="RGB")
    plt.figure(figsize=(20, 20))
    plt.imshow(img)
    plt.axis('off')
    plt.show()
    %matplotlib agg
    
def cv2_imshow_small(img):
    %matplotlib inline
    img = img[:,:,[2,1,0]]
    img = Image.fromarray(img, mode="RGB")
    plt.figure(figsize=(10, 10))
    plt.imshow(img)
    plt.axis('off')
    plt.show()
    %matplotlib agg

#### Finds Model Directories

In [3]:
def find_all_model_in_folder(folder_dir):
    for root, dirs, files in os.walk(folder_dir):
        for file in files:
            if file.endswith(".pth") and file.startswith("model"):
                path = os.path.join(root, file)
                path = path.replace("\\", "/")
                print(path)

## Initialization

#### Define Constant Variable

In [4]:
DATASET_PATH = "dataset/e-ktp/"
annotation_dir = "annotations/instances_default.json"
train_dataset_path = DATASET_PATH + 'train/'
val_dataset_path = DATASET_PATH + 'val/'
catalog_suffix = "ektp_"
train_catalog_name = catalog_suffix + 'train'
val_catalog_name = catalog_suffix + 'val'

#### Registering Dataset

In [5]:
## Dataset
from detectron2 import model_zoo
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.structures import BoxMode

register_coco_instances(train_catalog_name, {}, train_dataset_path + annotation_dir, train_dataset_path + 'images') # Train dataset
register_coco_instances(val_catalog_name, {}, val_dataset_path + annotation_dir, val_dataset_path + 'images') # Validation dataset

train_dataset_metadata = MetadataCatalog.get(train_catalog_name)

#### Visualizing the Train Dataset

In [None]:
dataset_dicts = DatasetCatalog.get(train_catalog_name)
for d in random.sample(dataset_dicts, 3):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=train_dataset_metadata, scale=0.5)
    vis = visualizer.draw_dataset_dict(d)
    cv2_imshow(vis.get_image()[:, :, ::-1])

## Image Augmentation

In [6]:
def augmentation_mapper():
    augs = T.AugmentationList([
        T.RandomBrightness(0.9, 1.1, prob=0.25),
        T.RandomFlip(prob=0.75),
        T.RandomCrop("absolute", (640, 640), prob=-.25),
        T.RandomLighting(scale=0.1, prob=0.25),
        T.RandomRotation(interp=Image.BILINEAR, prob=0.5)
    ])
    return augs

## Training

#### Default Constant Model Zoo and detectron2 configs

In [7]:
# R50 C4 1X
r50_c4_1x_model_zoo = "COCO-Detection/faster_rcnn_R_50_C4_1x.yaml"
r50_c4_1x_model_weights = "detectron2://COCO-Detection/faster_rcnn_R_50_C4_1x/137257644/model_final_721ade.pkl"

# R50 FPN 3X
r50_fpn_3x_model_zoo = "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"
r50_fpn_3x_model_weights = "detectron2://COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl"

#### Initialize training variables

In [8]:
model_zoo_name = r50_c4_1x_model_zoo
model_weights_name = r50_c4_1x_model_weights
train_dataset_name = train_catalog_name
val_dataset_name = val_catalog_name

#### Training Configurations

In [9]:
## Training
from detectron2.config import get_cfg
from detectron2.engine import DefaultTrainer
## Validation
from detectron2.engine import DefaultPredictor

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file(model_zoo_name))
cfg.MODEL.WEIGHTS = model_weights_name  # Let training initialize from model zoo

cfg.DATASETS.TRAIN = (train_dataset_name,) # the comma is important
cfg.DATASETS.TEST = (val_dataset_name,)

cfg.DATALOADER.NUM_WORKERS = 0

cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025  # pick a good Learning Rate
cfg.SOLVER.MAX_ITER = 3000    # 300 iterations seems good enough for this toy dataset; you may need to train longer for a practical dataset

cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 100   # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1

Notes:
* Batch per size is low because my GPU memory can't handle higher batch size :)
* 

#### Train goes here

In [10]:
from datetime import datetime

cfg.OUTPUT_DIR = "./results/faster-rcnn/"+datetime.now().strftime("%m.%d.%Y, %H;%M;%S")+"/"+model_zoo_name
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

trainer = DefaultTrainer(cfg) 
# trainer = CocoTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

[32m[05/27 13:09:28 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): ResNet(
    (stem): BasicStem(
      (conv1): Conv2d(
        3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
        (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
      )
    )
    (res2): Sequential(
      (0): BottleneckBlock(
        (shortcut): Conv2d(
          64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
        )
        (conv1): Conv2d(
          64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
        (conv2): Conv2d(
          64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
        (conv3): Conv2d(
          64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=256, eps

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 2048) in the checkpoint but (2, 2048) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (2,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 2048) in the checkpoint but (4, 2048) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.


[32m[05/27 13:09:28 d2.engine.train_loop]: [0mStarting training from iteration 0


	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  ..\torch\csrc\utils\python_arg_parser.cpp:882.)
  num_fg = fg_inds.nonzero().numel()


[32m[05/27 13:09:46 d2.utils.events]: [0m eta: 0:35:36  iter: 19  total_loss: 1.625  loss_cls: 0.6868  loss_box_reg: 0.6945  loss_rpn_cls: 0.183  loss_rpn_loc: 0.08228  time: 0.7188  data_time: 0.3510  lr: 4.9953e-06  max_mem: 4286M
[32m[05/27 13:09:59 d2.utils.events]: [0m eta: 0:35:02  iter: 39  total_loss: 1.531  loss_cls: 0.6342  loss_box_reg: 0.6315  loss_rpn_cls: 0.1325  loss_rpn_loc: 0.08277  time: 0.7040  data_time: 0.3477  lr: 9.9902e-06  max_mem: 4286M
[32m[05/27 13:10:14 d2.utils.events]: [0m eta: 0:34:48  iter: 59  total_loss: 1.545  loss_cls: 0.5735  loss_box_reg: 0.6525  loss_rpn_cls: 0.1855  loss_rpn_loc: 0.1001  time: 0.7145  data_time: 0.3743  lr: 1.4985e-05  max_mem: 4286M
[32m[05/27 13:10:28 d2.utils.events]: [0m eta: 0:34:34  iter: 79  total_loss: 1.476  loss_cls: 0.4869  loss_box_reg: 0.689  loss_rpn_cls: 0.186  loss_rpn_loc: 0.07523  time: 0.7029  data_time: 0.3239  lr: 1.998e-05  max_mem: 4286M
[32m[05/27 13:10:44 d2.utils.events]: [0m eta: 0:33:43  ite

## Evaluation

#### Image Augmentation for Validation Dataset

In [28]:
# TODO If theres any

#### Visualize Model Results

In [None]:
from detectron2.utils.visualizer import ColorMode

#Use the final weights generated after successful training for inference  
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")

cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.8  # set the testing threshold for this model
#Pass the validation dataset
cfg.DATASETS.TEST = (val_catalog_name)

predictor = DefaultPredictor(cfg)

val_dataset_dicts = DatasetCatalog.get(val_catalog_name)
val_dataset_metadata = MetadataCatalog.get(val_catalog_name)
for d in random.sample(val_dataset_dicts, 3):
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)
    v = Visualizer(im[:, :, ::-1],
                   metadata=val_dataset_metadata, 
                   scale=0.8,
                   instance_mode=ColorMode.IMAGE   
    )
    v = v.draw_instance_predictions(outputs["instances"].to("cpu")) #Passing the predictions to CPU from the GPU
    cv2_imshow(v.get_image()[:, :, ::-1])       

#### Anomaly
It still detects all rectangle object like e-ktp as e-ktp

In [None]:
for data in val_dataset_dicts:
    if(data["file_name"].find("78_selfie.jpg") != -1):
        im = cv2.imread(data["file_name"])
        outputs = predictor(im)
        v = Visualizer(im[:, :, ::-1],
                       metadata=val_dataset_metadata, 
                       scale=0.8,
                       instance_mode=ColorMode.IMAGE   
        )
        v = v.draw_instance_predictions(outputs["instances"].to("cpu")) #Passing the predictions to CPU from the GPU
        cv2_imshow(v.get_image()[:, :, ::-1])

#### Results in Average Precision

In [13]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset, LVISEvaluator
from detectron2.data import build_detection_test_loader

evaluator = COCOEvaluator(val_catalog_name, cfg, False, output_dir=cfg.OUTPUT_DIR)
val_loader = build_detection_test_loader(cfg, val_catalog_name)

[32m[05/27 13:51:49 d2.data.datasets.coco]: [0mLoaded 46 images in COCO format from dataset/e-ktp/val/annotations/instances_default.json
[32m[05/27 13:51:49 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[05/27 13:51:49 d2.data.common]: [0mSerializing 46 elements to byte tensors and concatenating them all ...
[32m[05/27 13:51:49 d2.data.common]: [0mSerialized dataset takes 0.01 MiB


In [14]:
inference_on_dataset(trainer.model, val_loader, evaluator)
# another equivalent way is to use trainer.test

[32m[05/27 13:51:50 d2.evaluation.evaluator]: [0mStart inference on 46 images
[32m[05/27 13:51:56 d2.evaluation.evaluator]: [0mInference done 11/46. 0.2245 s / img. ETA=0:00:13
[32m[05/27 13:52:01 d2.evaluation.evaluator]: [0mInference done 23/46. 0.1898 s / img. ETA=0:00:09
[32m[05/27 13:52:06 d2.evaluation.evaluator]: [0mInference done 42/46. 0.1995 s / img. ETA=0:00:01
[32m[05/27 13:52:07 d2.evaluation.evaluator]: [0mTotal inference time: 0:00:13.869114 (0.338271 s / img per device, on 1 devices)
[32m[05/27 13:52:07 d2.evaluation.evaluator]: [0mTotal inference pure compute time: 0:00:08 (0.204070 s / img per device, on 1 devices)
[32m[05/27 13:52:07 d2.evaluation.coco_evaluation]: [0mPreparing results for COCO format ...
[32m[05/27 13:52:07 d2.evaluation.coco_evaluation]: [0mSaving results to ./results/faster-rcnn/05.27.2021, 13;09;26/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml\coco_instances_results.json
[32m[05/27 13:52:07 d2.evaluation.coco_evaluation]: [0mEvalua

OrderedDict([('bbox',
              {'AP': 94.63930412467704,
               'AP50': 99.57868127238257,
               'AP75': 99.57868127238257,
               'APs': nan,
               'APm': nan,
               'APl': 94.63930412467704})])