In [1]:
# # VOC Format is required
# -
# -
# --Annotations/*.xml
# --ImageSets/Main/[train.txt, test.txt]
# --JPEGImage/*.jpg

## Libraries

In [2]:
from detectron2.data.catalog import DatasetCatalog, MetadataCatalog
import random
from matplotlib import pyplot as plt
import cv2
from detectron2.utils.visualizer import Visualizer
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2 import model_zoo
from detectron2.data.datasets import register_pascal_voc
import random
from pathlib import Path
from tqdm import tqdm
from detectron2.structures import Boxes

from detectron2.evaluation import COCOEvaluator
from detectron2.config import get_cfg
import os
from detectron2.structures import BoxMode
import numpy as np


class Trainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
        return COCOEvaluator(dataset_name, cfg, False, output_dir=output_folder)
    
def _convert_boxes(boxes):
        """
        Convert different format of boxes to an NxB array, where B = 4 or 5 is the box dimension.
        """
        if isinstance(boxes, Boxes) or isinstance(boxes, RotatedBoxes):
            return boxes.tensor.numpy()
        else:
            return np.asarray(boxes)

def get_train_dict(preds, train_dicts):
    
    dataset_dicts = []
    for pred in preds:
        
        output = pred['outputs']
        height, width = output.image_size
        
        annotations = []
        for idx in range(len(output)):
            obj = {
                'category_id': 0,
                'bbox': _convert_boxes(output.pred_boxes[idx])[0],
                'bbox_mode': BoxMode.XYXY_ABS
            }
            annotations.append(obj)
        record = {
            'file_name': pred['file_name'],
            'image_id': str(Path(pred['file_name']).stem),
            'height': height,
            'width': width,
            'annotations': annotations
        }
    
        dataset_dicts.append(record)
    dataset_dicts.extend(train_dicts)
    return dataset_dicts

def inference_pipeline(path_file, cfg):
    
    """
    Path_file: location of the RAW images
    cfg: model
    
    return list of predictions
    """
    
    predictor = DefaultPredictor(cfg)
    
    _labeled = []
    
    path_file = Path(path_file)    
    
    for f in tqdm(os.listdir(path_file)):
        # Load Imagen 
        path_f = os.path.join(path_file, f)
        image = cv2.imread(path_f)
        # Prediction
        outputs = predictor(image)
        
        # Take the positives inferences
        if len(outputs["instances"]) > 0: _labeled.append({'file_name':path_f,
                                                           'outputs': outputs['instances'].to('cpu')})
            
    print("{}/{} images with class founded".format(len(_labeled),
                                                  len(os.listdir(path_file))))
    
    return _labeled

def inference_pipeline2(path_file, cfg):
    
    """
    Path_file: location of the RAW images
    cfg: model
    
    return list of predictions
    """
    
    predictor = DefaultPredictor(cfg)
    
    _labeled = []
    
    path_file = Path(path_file)    
    
    for f in tqdm(os.listdir(path_file)):
        if 'vianoleo' in f:
            # Load Imagen 
            path_f = os.path.join(path_file, f)
            image = cv2.imread(path_f)
            # Prediction
            outputs = predictor(image)

            # Take the positives inferences
            if len(outputs["instances"]) > 0: _labeled.append({'file_name':path_f,
                                                               'outputs': outputs['instances'].to('cpu')})
            
    print("{}/{} images with class founded".format(len(_labeled),
                                                  len(os.listdir(path_file))))
    
    return _labeled

    
def custom_register_datas(voc_root, name, mode, CLASS_NAMES = ['Deformation']):
    
    """
    voc_root = root to the voc directory
    name = name of the dataset
    mode = type the name of the txt. Example 'train' reference to voc_root/ImageSets/Main/train.txt
    CLASS_NAMES = list which the classes. Example: ['A', B']
    
    This function return the registered datas
    """
    
    try:
        register_pascal_voc(name, voc_root, mode, None, class_names=CLASS_NAMES)
        print(mode, "Datas ready ", len(DatasetCatalog.get(name)))
    except:
        print(mode, "Training datas already registered, overwritting")
        DatasetCatalog.remove(name)
        register_pascal_voc(name, voc_root, mode, None, class_names=CLASS_NAMES)
        print(mode, "Datas ready ", len(DatasetCatalog.get(name)))
        
    return DatasetCatalog.get(name)



    

## Datas

In [3]:
train_dict = custom_register_datas('../DATAS/voc.data', 'train_ds', 'train', ['Deformation'])
valid_dict = custom_register_datas('../DATAS/voc.data', 'valid_ds', 'valid', ['Deformation'])

train Datas ready  580
valid Datas ready  142


## Network

In [4]:
net  = 'faster_rcnn_R_101_FPN_3x.yaml'

cfg = get_cfg()
cfg.OUTPUT_DIR = net # Folder where keep results

cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/"+net))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/"+net) # Initialize from COCO

cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # Classes

cfg.DATASETS.TRAIN = ("train_ds",) # Training
cfg.DATASETS.TEST = ("valid_ds",) # Test

## Training

In [None]:
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) 

cfg.SOLVER.IMS_PER_BATCH = 2
cfg.TEST.EVAL_PERIOD = 1000
cfg.SOLVER.CHECKPOINT_PERIOD = 1000


trainer = Trainer(cfg)
trainer.resume_or_load(False)
trainer.train()

[32m[11/26 17:20:48 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

[32m[11/26 17:20:48 d2.data.build]: [0mRemoved 0 images with no usable annotations. 580 images left.
[32m[11/26 17:20:48 d2.data.build]: [0mDistribution of instances among all 1 categories:
[36m|  category   | #instances   |
|:-----------:|:-------------|
| Deformation | 648          |
|             |              |[0m
[32m[11/26 17:20:48 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()]
[32m[11/26 17:20:48 d2.data.build]: [0mUsing training sampler TrainingSampler
[32m[11/26 17:20:48 d2.data.common]: [0mSerializing 580 elements to byte tensors and concatenating them all ...
[32m[11/26 17:20:48 d2.data.common]: [0mSerialized dataset takes 0.25 MiB


Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (2, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (2,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.


[32m[11/26 17:20:48 d2.engine.train_loop]: [0mStarting training from iteration 0


	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:882.)
  num_fg = fg_inds.nonzero().numel()


[32m[11/26 17:21:03 d2.utils.events]: [0m eta: 2 days, 4:16:07  iter: 19  total_loss: 0.4504  loss_cls: 0.2063  loss_box_reg: 0.0002737  loss_rpn_cls: 0.1887  loss_rpn_loc: 0.01262  time: 0.7218  data_time: 0.0126  lr: 0.00039962  max_mem: 3676M
[32m[11/26 17:21:16 d2.utils.events]: [0m eta: 2 days, 3:52:17  iter: 39  total_loss: 0.1889  loss_cls: 0.08236  loss_box_reg: 0.06824  loss_rpn_cls: 0.04541  loss_rpn_loc: 0.006632  time: 0.6986  data_time: 0.0030  lr: 0.00079922  max_mem: 3676M
[32m[11/26 17:21:30 d2.utils.events]: [0m eta: 2 days, 3:30:49  iter: 59  total_loss: 0.1974  loss_cls: 0.09211  loss_box_reg: 0.08027  loss_rpn_cls: 0.01898  loss_rpn_loc: 0.004273  time: 0.6918  data_time: 0.0032  lr: 0.0011988  max_mem: 3676M
[32m[11/26 17:21:42 d2.utils.events]: [0m eta: 2 days, 3:16:27  iter: 79  total_loss: 0.2976  loss_cls: 0.1149  loss_box_reg: 0.1643  loss_rpn_cls: 0.007758  loss_rpn_loc: 0.005469  time: 0.6730  data_time: 0.0031  lr: 0.0015984  max_mem: 3676M
[32m[11

In [None]:
%load_ext tensorboard
%tensorboard --logdir faster_rcnn_R_101_FPN_3x.yaml

In [None]:
!

## Inference - 0.7

In [None]:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_0001799.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.75 # Confidence level

# Make predictions over no labeled dataset
preds = inference_pipeline2('../DATAS/voc.data/Test', cfg)

In [None]:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_0001999.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.8 # Confidence level

# Make predictions over no labeled dataset
preds = inference_pipeline2('../DATAS/voc.data/Test', cfg)

In [None]:
# Register datas (needed)

DatasetCatalog.remove('train_ds')
DatasetCatalog.register("train_ds", lambda : get_train_dict(preds, []))

In [None]:
preds = DatasetCatalog.get('train_ds')

In [None]:
# Check some images
for d in random.sample(preds, 20):
    plt.figure(figsize=(20, 20))
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=MetadataCatalog.get("train_ds"), scale=1)
    out = visualizer.draw_dataset_dict(d)
    plt.imshow(out.get_image()[:, :, ::-1])
    plt.show()

In [None]:
# Check some images
for d in random.sample(preds, 10):
    if 'vianoleo' in d["file_name"]:
        plt.figure(figsize=(15, 15))
        img = cv2.imread(d["file_name"])
        visualizer = Visualizer(img[:, :, ::-1], metadata=  MetadataCatalog.get("train_ds"), scale=1)
        out = visualizer.draw_dataset_dict(d)
        plt.imshow(out.get_image()[:, :, ::-1])
        plt.show()

In [None]:
# Register the predictions as training datas
DatasetCatalog.remove('train_ds')
DatasetCatalog.register("train_ds", lambda : get_train_dict(preds, []))

In [None]:
# Little training over this new data
cfg.OUTPUT_DIR = 'FT08_{}'.format('faster_rcnn_R_50_FPN_3x.yaml')
cfg.SOLVER.MAX_ITER = 600
cfg.SOLVER.STEPS = (350, 400)
cfg.SOLVER.BASE_LR = 0.001

cfg.SOLVER.IMS_PER_BATCH = 4
cfg.TEST.EVAL_PERIOD = 100
cfg.SOLVER.CHECKPOINT_PERIOD = 100

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg)
trainer.resume_or_load(False)
trainer.train()

## Inference 0.6

In [None]:
cfg.OUTPUT_DIR = 'FT08_{}'.format('faster_rcnn_R_50_FPN_3x.yaml')
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_0000499.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.6 # Confidence level

# Make predictions over no labeled dataset
preds = inference_pipeline('../DATAS/voc.data/Test', cfg)

In [None]:
# Extend datas
dataset_dict =  get_train_dict(preds, train_dict)

In [None]:
len(train_dict), len(dataset_dict)

In [None]:
# Check some images
for d in random.sample(dataset_dict, 10):
    if 'vianoleo' in d["file_name"]:
        plt.figure(figsize=(15, 15))
        img = cv2.imread(d["file_name"])
        visualizer = Visualizer(img[:, :, ::-1], metadata=  MetadataCatalog.get("train_ds"), scale=1)
        out = visualizer.draw_dataset_dict(d)
        plt.imshow(out.get_image()[:, :, ::-1])
        plt.show()