In [1]:
import fiftyone as fo
from detectron2.structures import BoxMode
import sys

# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger

setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from matplotlib import pyplot as plt
from detectron2.data.datasets import register_coco_instances
from detectron2.data import MetadataCatalog, DatasetCatalog
import pandas as pd

In [2]:
dataset = fo.Dataset.from_dir(
    dataset_dir="prepared",
    dataset_type=fo.types.FiftyOneDataset,
)


Importing samples...
 100% |█████████████████| 120/120 [41.2ms elapsed, 0s remaining, 2.9K samples/s]      
Import complete


In [None]:
def split_dataset(dataset):
        # Tag test images.
    testset_view = dataset.take(round(0.1 * len(dataset)), seed=42)
        
            

    testset_view.tag_samples("test")

    # Split remaining images into train and valid.
    nontestset_view = dataset.match_tags("test", bool=False)
    validset_view = nontestset_view.take(
        round(0.2 * len(nontestset_view)), seed=42
    )
    validset_view.tag_samples("valid")
    nontestset_view.match_tags("valid", bool=False).tag_samples("train")
    dataset.save()
    return dataset

In [3]:
dataset.default_classes

['cocoa', 'invalid']

In [4]:
def get_fiftyone_dicts(dataset):
    dataset.compute_metadata()

    dataset_dicts = []
    for sample in dataset:
        height = sample.metadata["height"]
        width = sample.metadata["width"]
        record = {}
        record["file_name"] = sample.filepath
        record["image_id"] = sample.id
        record["height"] = height
        record["width"] = width

        objs = []
        for det in sample.ground_truth.detections:
            tlx, tly, w, h = det.bounding_box
            bbox = [int(tlx*width), int(tly*height), int(w*width), int(h*height)]
            
            obj = {
                "bbox": bbox,
                "bbox_mode": BoxMode.XYWH_ABS,
                "category_id": dataset.default_classes.index(det.label),
            }
            objs.append(obj)

        record["annotations"] = objs
        dataset_dicts.append(record)

    return dataset_dicts


In [5]:
for d in ["train", "valid"]:
    view = dataset.match_tags(d)
    DatasetCatalog.register("fiftyone_" + d, lambda view=view: get_fiftyone_dicts(view))
    MetadataCatalog.get("fiftyone_" + d).thing_classes=['cocoa', 'invalid']

metadata = MetadataCatalog.get("fiftyone_train")


In [None]:
print(metadata)

In [None]:
dataset_dicts = get_fiftyone_dicts(dataset.match_tags("train"))
ids = [dd["image_id"] for dd in dataset_dicts]

view = dataset.select(ids)
session = fo.launch_app(view)

In [None]:
session.freeze()  # screenshot the App

In [None]:
label_count = view.count_values("ground_truth.detections.label")
print(label_count)

In [25]:
from detectron2.engine import DefaultTrainer

cfg = get_cfg()
cfg.OUTPUT_DIR = "detectron_files/models"
#cfg.merge_from_file(model_zoo.get_config_file(""))
cfg.DATASETS.TRAIN = ("fiftyone_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.TEST.EVAL_PERIOD = 1000
#cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 4  # This is the real "batch size" commonly known to deep learning people
cfg.SOLVER.BASE_LR = 0.00001  # pick a good LR
cfg.SOLVER.MAX_ITER = 10000    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = [] # do not decay learning rate
cfg.SOLVER.AMP.ENABLED = True        
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 64  # The "RoIHead batch size". 128 is faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2  # only has one class (Vehicle registration plate). (see https://detectron2.readthedocs.io/tutorials/datasets.

In [26]:
from detectron2.engine import DefaultTrainer
from detectron2.evaluation import COCOEvaluator

class CocoTrainer(DefaultTrainer):

  @classmethod
  def build_evaluator(cls, cfg, dataset_name, output_folder=None):

    if output_folder is None:
        os.makedirs("coco_eval", exist_ok=True)
        output_folder = "coco_eval"

    return COCOEvaluator(dataset_name, cfg, False, output_folder)

In [27]:
#html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

[32m[01/17 20:57:18 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): ResNet(
    (stem): BasicStem(
      (conv1): Conv2d(
        3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
        (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
      )
    )
    (res2): Sequential(
      (0): BottleneckBlock(
        (shortcut): Conv2d(
          64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
        )
        (conv1): Conv2d(
          64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
        (conv2): Conv2d(
          64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
        (conv3): Conv2d(
          64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=256, eps



[32m[01/17 20:57:28 d2.utils.events]: [0m eta: 0:55:25  iter: 19  total_loss: 832.2  loss_cls: 277.7  loss_box_reg: 375.2  loss_rpn_cls: 84.25  loss_rpn_loc: 85.15    time: 0.3588  last_time: 0.3971  data_time: 0.0207  last_data_time: 0.0080   lr: 1.9981e-07  max_mem: 4007M
[32m[01/17 20:57:39 d2.utils.events]: [0m eta: 1:16:40  iter: 39  total_loss: 260.1  loss_cls: 176.2  loss_box_reg: 61.44  loss_rpn_cls: 7.111  loss_rpn_loc: 18.38    time: 0.4552  last_time: 0.5646  data_time: 0.0083  last_data_time: 0.0073   lr: 3.9961e-07  max_mem: 4007M
[32m[01/17 20:57:50 d2.utils.events]: [0m eta: 1:33:46  iter: 59  total_loss: 46.21  loss_cls: 22.79  loss_box_reg: 9.357  loss_rpn_cls: 1.579  loss_rpn_loc: 9.947    time: 0.4947  last_time: 0.5679  data_time: 0.0076  last_data_time: 0.0070   lr: 5.9941e-07  max_mem: 4007M
[32m[01/17 20:58:02 d2.utils.events]: [0m eta: 1:34:03  iter: 79  total_loss: 16.42  loss_cls: 4.638  loss_box_reg: 2.466  loss_rpn_cls: 1.053  loss_rpn_loc: 7.786    

KeyboardInterrupt: 

In [20]:
# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

[32m[01/17 20:47:50 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from detectron_files/models/model_final.pth ...


In [21]:
def detectron_to_fo(outputs, img_w, img_h):
    # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
    detections = []
    instances = outputs["instances"].to("cpu")
    for pred_box, score, c in zip(
        instances.pred_boxes, instances.scores, instances.pred_classes, 
    ):
        x1, y1, x2, y2 = pred_box
        bbox = [float(x1)/img_w, float(y1)/img_h, float(x2-x1)/img_w, float(y2-y1)/img_h]
        detection = fo.Detection(label=dataset.default_classes[c], confidence=float(score), bounding_box=bbox, )
        detections.append(detection)

    return fo.Detections(detections=detections)

In [22]:
for sample in dataset:
    img_w = sample.metadata["width"]
    img_h = sample.metadata["height"]
    img = cv2.imread(sample["filepath"])
    outputs = predictor(img)
    detections = detectron_to_fo(outputs, img_w, img_h)
    sample["predictions"] = detections
    sample.save()

In [None]:
print(outputs['instances'].pred_classes)

In [None]:
fo.Session(dataset)

In [24]:
splits = ['train','valid','test']
for split_tag in splits:
    view = dataset.match_tags([split_tag])

    # Evaluate the objects in the `predictions`
    # field with respect to the
    # objects in the `ground_truth` field
    eval_key = f"eval_predictions_{split_tag}"
    results = view.evaluate_detections(
        "predictions",
        gt_field="ground_truth",
        eval_key=eval_key,
        compute_mAP=True,
        classes=dataset.default_classes,
        missing="background",
        classwise=True,
    )
    # whether to consider objects with different label
    # values as always non-overlapping (True) or to compute IoUs
    # for all objects regardless of label (False)

    # the COCO mAP evaluator averages the mAP
    # over 10 IoU thresholds from 0.5 to 0.95
    # with a step size of 0.05 (AP@[0.5:0.05:0.95])
    # To be found in the source of fiftyone.
    # "https://github.com/voxel51/fiftyone/blob/"
    # "acf3a8f886505d852903e320d057057813261993/fiftyone/"
    # "utils/eval/coco.py#L91"
    mAP = results.mAP()
    print(f"mAP@[0.5:0.05:0.95] {split_tag} : " + str(mAP))
    classwise_ap_df = pd.DataFrame(
        columns=["Label", "AP@[0.5:0.05:0.95]"]
    )
    for label in dataset.default_classes:
        class_AP = results.mAP([label])
        print(
            f"AP@[0.5:0.05:0.95] of {split_tag} ({label}): "
            + str(class_AP)
        )
        classwise_ap_df = classwise_ap_df._append(
            {"Label": label, "AP@[0.5:0.05:0.95]": class_AP},
            ignore_index=True,
        )

    
    results.print_report()
    report = results.report()
    weighted_avg_precision = report["weighted avg"]["precision"]
    weighted_avg_recall = report["weighted avg"]["recall"]
    

    # Print some statistics about the total TP/FP/FN counts
    mean_tp = view.sum(f"{eval_key}_tp")
    mean_fp = view.sum(f"{eval_key}_fp")
    mean_fn = view.sum(f"{eval_key}_fn")
    print(f"TP ({split_tag}): {mean_tp}")
    print(f"FP ({split_tag}): {mean_fp}")
    print(f"FN ({split_tag}): {mean_fn}")

    
    # class_counts = view.count_values("predictions.detections.label")

    # pr_curve_path = os.path.join(
    #     artifacts_path, f"PR_curve_{split_tag}.png"
    # )
    # pr_curve_plot: Figure = results.plot_pr_curves(
    #     classes=list(class_counts.keys()),
    #     backend="matplotlib",
    #     style="dark_background",
    # )
    # pr_curve_plot.savefig(pr_curve_path, dpi=250)
    # mlflow.log_artifact(pr_curve_path)

    # conf_mat_path = os.path.join(
    #     self.artifacts_path, f"confusion_matrix_{split_tag}.png"
    # )
    # conf_mat_plot: Figure = results.plot_confusion_matrix(
    #     backend="matplotlib"
    # )
    # conf_mat_plot.savefig(conf_mat_path, dpi=250)
    # mlflow.log_artifact(conf_mat_path)

    # mlflow.end_run()

    # return dataset


Evaluating detections...
 100% |███████████████████| 86/86 [21.6s elapsed, 0s remaining, 4.0 samples/s]      
Performing IoU sweep...
 100% |███████████████████| 86/86 [14.4s elapsed, 0s remaining, 6.2 samples/s]      
mAP@[0.5:0.05:0.95] train : 0.3137940867897281
AP@[0.5:0.05:0.95] of train (cocoa): 0.5863435448050294
AP@[0.5:0.05:0.95] of train (invalid): 0.041244628774426854
              precision    recall  f1-score   support

       cocoa       0.92      0.96      0.94      3714
     invalid       0.67      0.19      0.30       586

   micro avg       0.91      0.85      0.88      4300
   macro avg       0.79      0.57      0.62      4300
weighted avg       0.88      0.85      0.85      4300

TP (train): 3664
FP (train): 372
FN (train): 636


  classwise_ap_df = classwise_ap_df._append(


Evaluating detections...
 100% |███████████████████| 22/22 [5.5s elapsed, 0s remaining, 3.9 samples/s]      
Performing IoU sweep...
 100% |███████████████████| 22/22 [3.8s elapsed, 0s remaining, 5.1 samples/s]      
mAP@[0.5:0.05:0.95] valid : 0.2907385663692545
AP@[0.5:0.05:0.95] of valid (cocoa): 0.5618770789468348
AP@[0.5:0.05:0.95] of valid (invalid): 0.019600053791674177
              precision    recall  f1-score   support

       cocoa       0.92      0.94      0.93       963
     invalid       0.40      0.13      0.20       137

   micro avg       0.90      0.84      0.87      1100
   macro avg       0.66      0.54      0.57      1100
weighted avg       0.86      0.84      0.84      1100

TP (valid): 928
FP (valid): 102
FN (valid): 172


  classwise_ap_df = classwise_ap_df._append(


Evaluating detections...
 100% |███████████████████| 12/12 [2.9s elapsed, 0s remaining, 4.1 samples/s]      
Performing IoU sweep...
 100% |███████████████████| 12/12 [1.9s elapsed, 0s remaining, 6.2 samples/s]         
mAP@[0.5:0.05:0.95] test : 0.27768122652848776
AP@[0.5:0.05:0.95] of test (cocoa): 0.5434246878518836
AP@[0.5:0.05:0.95] of test (invalid): 0.011937765205091938
              precision    recall  f1-score   support

       cocoa       0.93      0.93      0.93       537
     invalid       0.36      0.08      0.13        63

   micro avg       0.91      0.84      0.87       600
   macro avg       0.64      0.50      0.53       600
weighted avg       0.87      0.84      0.84       600

TP (test): 503
FP (test): 48
FN (test): 97


  classwise_ap_df = classwise_ap_df._append(
