### Installations & Imports

In [None]:
!pip uninstall flask
!pip install fiftyone
!pip install torch torchvision
!pip uninstall urllib3
!pip install urllib3
!pip install ipywidgets>=7.5

In [71]:
import cv2
from google.colab.patches import cv2_imshow
import numpy as np
from matplotlib import pyplot as plt
import math
import torch
import torchvision
import fiftyone as fo
import fiftyone.zoo as foz
from fiftyone import ViewField as F
from PIL import Image
from torchvision.transforms import functional as func

In [72]:
# Run the model on GPU if it is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### Loading COCO 2017 

In [73]:
# Load COCO 2017 validation set
coco = foz.load_zoo_dataset(
    "coco-2017",
    split="validation"
)
coco.persistent = True

# Get class list
coco_classes = coco.default_classes

Downloading split 'validation' to '/root/fiftyone/coco-2017/validation' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'validation' to '/root/fiftyone/coco-2017/validation' if necessary


Found annotations at '/root/fiftyone/coco-2017/raw/instances_val2017.json'


INFO:fiftyone.utils.coco:Found annotations at '/root/fiftyone/coco-2017/raw/instances_val2017.json'


Images already downloaded


INFO:fiftyone.utils.coco:Images already downloaded


Existing download of split 'validation' is sufficient


INFO:fiftyone.zoo.datasets:Existing download of split 'validation' is sufficient


Loading existing dataset 'coco-2017-validation'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


INFO:fiftyone.zoo.datasets:Loading existing dataset 'coco-2017-validation'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


In [34]:
coco_ds = coco.take(1000, seed=51)

In [35]:
print(coco_ds)

Dataset:     coco-2017-validation
Media type:  image
Num samples: 1000
Sample fields:
    id:                  fiftyone.core.fields.ObjectIdField
    filepath:            fiftyone.core.fields.StringField
    tags:                fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:            fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.ImageMetadata)
    ground_truth:        fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    faster_rcnn:         fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    ssd:                 fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    retinanet:           fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    eval_faster_RCNN_tp: fiftyone.core.fields.IntField
    eval_faster_RCNN_fp: fiftyone.core.fields.IntField
    eval_faster_RCNN_fn: fiftyone.core.fields.IntField
    eval_ssd_tp:         fiftyo

### Loading PASCAL VOC 2012

In [6]:
voc = foz.load_zoo_dataset(
    "voc-2012",
    split="validation"
)
voc.persistent = True

# Get class list
voc_classes = voc.default_classes

Downloading split 'validation' to '/root/fiftyone/voc-2012/validation'


INFO:fiftyone.zoo.datasets:Downloading split 'validation' to '/root/fiftyone/voc-2012/validation'


Downloading http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar to /root/fiftyone/voc-2012/tmp-download/VOCtrainval_11-May-2012.tar


  0%|          | 0/1999639040 [00:00<?, ?it/s]

Extracting /root/fiftyone/voc-2012/tmp-download/VOCtrainval_11-May-2012.tar to /root/fiftyone/voc-2012/tmp-download
 100% |███████████████| 5823/5823 [1.3m elapsed, 0s remaining, 76.8 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 5823/5823 [1.3m elapsed, 0s remaining, 76.8 samples/s]      


Dataset info written to '/root/fiftyone/voc-2012/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/voc-2012/info.json'


Loading 'voc-2012' split 'validation'


INFO:fiftyone.zoo.datasets:Loading 'voc-2012' split 'validation'


 100% |███████████████| 5823/5823 [17.4s elapsed, 0s remaining, 269.5 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 5823/5823 [17.4s elapsed, 0s remaining, 269.5 samples/s]      


Dataset 'voc-2012-validation' created


INFO:fiftyone.zoo.datasets:Dataset 'voc-2012-validation' created


In [36]:
voc_ds = voc.take(1000, seed=51)

In [37]:
print(voc_ds)

Dataset:     voc-2012-validation
Media type:  image
Num samples: 1000
Sample fields:
    id:                  fiftyone.core.fields.ObjectIdField
    filepath:            fiftyone.core.fields.StringField
    tags:                fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:            fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.ImageMetadata)
    ground_truth:        fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    faster_rcnn:         fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    ssd:                 fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    retinanet:           fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    eval_faster_RCNN_tp: fiftyone.core.fields.IntField
    eval_faster_RCNN_fp: fiftyone.core.fields.IntField
    eval_faster_RCNN_fn: fiftyone.core.fields.IntField
    eval_ssd_tp:         fiftyon

### Loading models

In [None]:
# Load a pre-trained Faster R-CNN model
faster_RCNN = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
faster_RCNN.to(device)
faster_RCNN.eval()

# Load a pre-trained SSD model
ssd = torchvision.models.detection.ssd300_vgg16(pretrained=True)
ssd.to(device)
ssd.eval()

# Load a pre-trained RetinaNet model
retinaNet = torchvision.models.detection.retinanet_resnet50_fpn_v2(pretrained=True)
retinaNet.to(device)
retinaNet.eval()

### Get Detections for Evaluation

In [23]:
def get_detections(model, image):
  c, h, w = image.shape
  preds = model([image])[0]
  labels = preds["labels"].cpu().detach().numpy()
  scores = preds["scores"].cpu().detach().numpy()
  boxes = preds["boxes"].cpu().detach().numpy()

  detections = []
  for label, score, box in zip(labels, scores, boxes):
    # Convert to [top-left-x, top-left-y, width, height] in relative coordinates in [0, 1] x [0, 1]
    x1, y1, x2, y2 = box
    rel_box = [x1 / w, y1 / h, (x2 - x1) / w, (y2 - y1) / h]

    detections.append(
        fo.Detection(
            label=coco_classes[label],
            bounding_box=rel_box,
            confidence=score
        )
    )

  return detections

In [22]:
def predict(ds):
  # Add predictions to samples
  with fo.ProgressBar() as pb:
      for sample in pb(ds):
          # Load image
          image = Image.open(sample.filepath)
          image = func.to_tensor(image).to(device)
          
          # Perform inference
          detections_faster_RCNN = get_detections(faster_RCNN, image)
          detections_ssd = get_detections(ssd, image)
          detections_retinaNet = get_detections(retinaNet, image)

          # Save predictions to dataset
          sample["faster_rcnn"] = fo.Detections(detections=detections_faster_RCNN)
          sample["ssd"] = fo.Detections(detections=detections_ssd)
          sample["retinanet"] = fo.Detections(detections=detections_retinaNet)
          sample.save()

  print("Finished adding predictions")

In [38]:
predict(coco_ds)

 100% |███████████████| 1000/1000 [7.5m elapsed, 0s remaining, 2.4 samples/s]       


INFO:eta.core.utils: 100% |███████████████| 1000/1000 [7.5m elapsed, 0s remaining, 2.4 samples/s]       


Finished adding predictions


In [39]:
predict(voc_ds)

 100% |███████████████| 1000/1000 [7.0m elapsed, 0s remaining, 2.0 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 1000/1000 [7.0m elapsed, 0s remaining, 2.0 samples/s]      


Finished adding predictions


In [74]:
session = fo.launch_app()

In [27]:
common_classes=['person', 'car', 'dog', 'sheep', 'bottle', 'cat', 'cow', 
                'horse', 'bicycle', 'boat', 'bus', 'train', 'chair']

In [50]:
filtered_coco_faster_RCNN = coco_ds.filter_labels("faster_rcnn", F("confidence") > 0.50, only_matches=False)
filtered_coco_ssd = coco_ds.filter_labels("ssd", F("confidence") > 0.30, only_matches=False)
filtered_coco_retinaNet = coco_ds.filter_labels("retinanet", F("confidence") > 0.40, only_matches=False)

filtered_voc_faster_RCNN = voc_ds.filter_labels("faster_rcnn", F("confidence") > 0.50, only_matches=False)
filtered_voc_ssd = voc_ds.filter_labels("ssd", F("confidence") > 0.30, only_matches=False)
filtered_voc_retinaNet = voc_ds.filter_labels("retinanet", F("confidence") > 0.40, only_matches=False)

In [51]:
faster_RCNN_coco_results = filtered_coco_faster_RCNN.evaluate_detections(
    "faster_rcnn", gt_field="ground_truth", eval_key="eval_faster_RCNN", classes=coco_classes, compute_mAP=True)

ssd_coco_results = filtered_coco_ssd.evaluate_detections(
    "ssd", gt_field="ground_truth", eval_key="eval_ssd", classes=coco_classes, compute_mAP=True)

retinaNet_coco_results = filtered_coco_retinaNet.evaluate_detections(
    "retinanet", gt_field="ground_truth", eval_key="eval_retinanet", classes=coco_classes, compute_mAP=True)

faster_RCNN_voc_results = filtered_voc_faster_RCNN.evaluate_detections(
    "faster_rcnn", gt_field="ground_truth", eval_key="eval_faster_RCNN", classes=common_classes, compute_mAP=True)

ssd_voc_results = filtered_voc_ssd.evaluate_detections(
    "ssd", gt_field="ground_truth", eval_key="eval_ssd", classes=common_classes, compute_mAP=True)

retinaNet_voc_results = filtered_voc_retinaNet.evaluate_detections(
    "retinanet", gt_field="ground_truth", eval_key="eval_retinanet", classes=common_classes, compute_mAP=True)

Evaluating detections...


INFO:fiftyone.utils.eval.detection:Evaluating detections...


 100% |███████████████| 1000/1000 [56.4s elapsed, 0s remaining, 19.4 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 1000/1000 [56.4s elapsed, 0s remaining, 19.4 samples/s]      


Performing IoU sweep...


INFO:fiftyone.utils.eval.coco:Performing IoU sweep...


 100% |███████████████| 1000/1000 [25.2s elapsed, 0s remaining, 39.9 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 1000/1000 [25.2s elapsed, 0s remaining, 39.9 samples/s]      


Evaluating detections...


INFO:fiftyone.utils.eval.detection:Evaluating detections...


 100% |███████████████| 1000/1000 [33.1s elapsed, 0s remaining, 25.1 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 1000/1000 [33.1s elapsed, 0s remaining, 25.1 samples/s]      


Performing IoU sweep...


INFO:fiftyone.utils.eval.coco:Performing IoU sweep...


 100% |███████████████| 1000/1000 [16.3s elapsed, 0s remaining, 58.3 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 1000/1000 [16.3s elapsed, 0s remaining, 58.3 samples/s]      


Evaluating detections...


INFO:fiftyone.utils.eval.detection:Evaluating detections...


 100% |███████████████| 1000/1000 [42.4s elapsed, 0s remaining, 23.3 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 1000/1000 [42.4s elapsed, 0s remaining, 23.3 samples/s]      


Performing IoU sweep...


INFO:fiftyone.utils.eval.coco:Performing IoU sweep...


 100% |███████████████| 1000/1000 [22.1s elapsed, 0s remaining, 45.5 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 1000/1000 [22.1s elapsed, 0s remaining, 45.5 samples/s]      


Evaluating detections...


INFO:fiftyone.utils.eval.detection:Evaluating detections...


 100% |███████████████| 1000/1000 [28.2s elapsed, 0s remaining, 43.0 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 1000/1000 [28.2s elapsed, 0s remaining, 43.0 samples/s]      


Performing IoU sweep...


INFO:fiftyone.utils.eval.coco:Performing IoU sweep...


 100% |███████████████| 1000/1000 [12.1s elapsed, 0s remaining, 89.9 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 1000/1000 [12.1s elapsed, 0s remaining, 89.9 samples/s]      


Evaluating detections...


INFO:fiftyone.utils.eval.detection:Evaluating detections...


 100% |███████████████| 1000/1000 [14.4s elapsed, 0s remaining, 71.5 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 1000/1000 [14.4s elapsed, 0s remaining, 71.5 samples/s]      


Performing IoU sweep...


INFO:fiftyone.utils.eval.coco:Performing IoU sweep...


 100% |███████████████| 1000/1000 [8.8s elapsed, 0s remaining, 119.2 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 1000/1000 [8.8s elapsed, 0s remaining, 119.2 samples/s]      


Evaluating detections...


INFO:fiftyone.utils.eval.detection:Evaluating detections...


 100% |███████████████| 1000/1000 [20.4s elapsed, 0s remaining, 59.8 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 1000/1000 [20.4s elapsed, 0s remaining, 59.8 samples/s]      


Performing IoU sweep...


INFO:fiftyone.utils.eval.coco:Performing IoU sweep...


 100% |███████████████| 1000/1000 [10.6s elapsed, 0s remaining, 103.1 samples/s]     


INFO:eta.core.utils: 100% |███████████████| 1000/1000 [10.6s elapsed, 0s remaining, 103.1 samples/s]     


In [52]:
# Print a classification report -- Faster R-CNN -- COCO dataset
faster_RCNN_coco_results.print_report(classes=common_classes)
print("mAP = ", faster_RCNN_coco_results.mAP())
plot = faster_RCNN_coco_results.plot_pr_curves(classes=common_classes)
plot.show()

              precision    recall  f1-score   support

      person       0.76      0.84      0.80      2499
         car       0.67      0.74      0.71       545
         dog       0.61      0.68      0.65        44
       sheep       0.63      0.72      0.67        57
      bottle       0.47      0.65      0.54       187
         cat       0.70      0.92      0.80        38
         cow       0.76      0.74      0.75       134
       horse       0.78      0.86      0.82        50
     bicycle       0.54      0.55      0.55        65
        boat       0.48      0.60      0.54        70
         bus       0.67      0.69      0.68        51
       train       0.62      0.78      0.69        36
       chair       0.46      0.51      0.48       305

   micro avg       0.69      0.78      0.73      4081
   macro avg       0.63      0.71      0.67      4081
weighted avg       0.70      0.78      0.74      4081

mAP =  0.35487406849841835


In [65]:
faster_RCNN_coco_results.plot_confusion_matrix(classes=common_classes).show()

In [53]:
# Print a classification report -- SSD -- COCO dataset
ssd_coco_results.print_report(classes=common_classes)
print("mAP = ", ssd_coco_results.mAP())
plot = ssd_coco_results.plot_pr_curves(classes=common_classes)
plot.show()

              precision    recall  f1-score   support

      person       0.89      0.53      0.66      2247
         car       0.76      0.31      0.44       457
         dog       0.69      0.57      0.62        44
       sheep       0.72      0.23      0.35        57
      bottle       0.76      0.20      0.32       187
         cat       0.87      0.87      0.87        38
         cow       0.84      0.35      0.49       120
       horse       0.82      0.64      0.72        50
     bicycle       0.82      0.22      0.34        65
        boat       0.64      0.11      0.19        62
         bus       0.94      0.57      0.71        51
       train       0.93      0.78      0.85        36
       chair       0.64      0.23      0.33       304

   micro avg       0.85      0.44      0.58      3718
   macro avg       0.79      0.43      0.53      3718
weighted avg       0.83      0.44      0.57      3718

mAP =  0.21310037432771503


In [63]:
ssd_coco_results.plot_confusion_matrix(classes=common_classes).show()

In [54]:
# Print a classification report -- RetinaNet -- COCO dataset
retinaNet_coco_results.print_report(classes=common_classes)
print("mAP = ", retinaNet_coco_results.mAP())
plot = retinaNet_coco_results.plot_pr_curves(classes=common_classes)
plot.show()

              precision    recall  f1-score   support

      person       0.85      0.75      0.80      2314
         car       0.74      0.63      0.68       471
         dog       0.70      0.70      0.70        44
       sheep       0.81      0.67      0.73        57
      bottle       0.69      0.49      0.57       187
         cat       0.72      0.89      0.80        38
         cow       0.90      0.70      0.79       128
       horse       0.84      0.82      0.83        50
     bicycle       0.78      0.58      0.67        65
        boat       0.61      0.31      0.41        62
         bus       0.89      0.76      0.82        51
       train       0.74      0.81      0.77        36
       chair       0.62      0.45      0.52       304

   micro avg       0.81      0.69      0.74      3807
   macro avg       0.76      0.66      0.70      3807
weighted avg       0.80      0.69      0.74      3807

mAP =  0.3735687967508535


In [66]:
retinaNet_coco_results.plot_confusion_matrix(classes=common_classes).show()

In [55]:
# Print a classification report -- Faster R-CNN -- PASCAL VOC 2012 dataset
faster_RCNN_voc_results.print_report(classes=common_classes)
print("mAP = ", faster_RCNN_voc_results.mAP())
plot = faster_RCNN_voc_results.plot_pr_curves(classes=common_classes)
plot.show()

              precision    recall  f1-score   support

      person       0.60      0.90      0.72       921
         car       0.58      0.83      0.68       237
         dog       0.68      0.95      0.79       139
       sheep       0.71      0.63      0.67        73
      bottle       0.44      0.75      0.55        80
         cat       0.77      0.93      0.84       114
         cow       0.79      0.76      0.78        71
       horse       0.72      0.91      0.80        69
     bicycle       0.56      0.93      0.70        54
        boat       0.38      0.78      0.51        77
         bus       0.72      0.89      0.80        53
       train       0.75      0.89      0.81        46
       chair       0.35      0.63      0.45       209

   micro avg       0.58      0.85      0.69      2143
   macro avg       0.62      0.83      0.70      2143
weighted avg       0.59      0.85      0.69      2143

mAP =  0.5044338340682839


In [67]:
faster_RCNN_voc_results.plot_confusion_matrix(classes=common_classes).show()

In [56]:
# Print a classification report -- SSD -- PASCAL VOC 2012 dataset
ssd_voc_results.print_report(classes=common_classes)
print("mAP = ", ssd_voc_results.mAP())
plot = ssd_voc_results.plot_pr_curves(classes=common_classes)
plot.show()

              precision    recall  f1-score   support

      person       0.86      0.71      0.78       921
         car       0.89      0.61      0.72       237
         dog       0.84      0.86      0.85       139
       sheep       0.87      0.62      0.72        73
      bottle       0.68      0.26      0.38        80
         cat       0.85      0.88      0.87       114
         cow       0.75      0.61      0.67        71
       horse       0.90      0.83      0.86        69
     bicycle       0.87      0.76      0.81        54
        boat       0.83      0.44      0.58        77
         bus       0.88      0.79      0.83        53
       train       0.88      0.78      0.83        46
       chair       0.63      0.43      0.51       209

   micro avg       0.84      0.67      0.74      2143
   macro avg       0.83      0.66      0.72      2143
weighted avg       0.83      0.67      0.73      2143

mAP =  0.4406238848034472


In [68]:
ssd_voc_results.plot_confusion_matrix(classes=common_classes).show()

In [57]:
# Print a classification report -- RetinaNet -- PASCAL VOC 2012 dataset
retinaNet_voc_results.print_report(classes=common_classes)
print("mAP = ", retinaNet_voc_results.mAP())
plot = retinaNet_voc_results.plot_pr_curves(classes=common_classes)
plot.show()

              precision    recall  f1-score   support

      person       0.73      0.87      0.79       921
         car       0.76      0.81      0.78       237
         dog       0.73      0.94      0.82       139
       sheep       0.83      0.67      0.74        73
      bottle       0.64      0.64      0.64        80
         cat       0.77      0.95      0.85       114
         cow       0.79      0.77      0.78        71
       horse       0.80      0.88      0.84        69
     bicycle       0.76      0.94      0.84        54
        boat       0.60      0.70      0.65        77
         bus       0.80      0.91      0.85        53
       train       0.86      0.83      0.84        46
       chair       0.49      0.64      0.55       209

   micro avg       0.71      0.83      0.77      2143
   macro avg       0.74      0.81      0.77      2143
weighted avg       0.72      0.83      0.77      2143

mAP =  0.5691832933285179


In [69]:
retinaNet_voc_results.plot_confusion_matrix(classes=common_classes).show()