In [6]:
import fiftyone as fo
import fiftyone.zoo as foz

dataset = foz.load_zoo_dataset(
    "coco-2017",
    split="validation",
    label_types=["detections"],
    classes=["person"],
    max_samples=100,
    dataset_name="detector-recipe",
    # classes=["person"],
    only_matching=True
)

Downloading split 'validation' to 'C:\Users\01149762\fiftyone\coco-2017\validation' if necessary
Found annotations at 'C:\Users\01149762\fiftyone\coco-2017\raw\instances_val2017.json'
93 images found; downloading the remaining 7
 100% |██████████████████████| 7/7 [958.6ms elapsed, 0s remaining, 7.3 images/s]      
Writing annotations for 107 downloaded samples to 'C:\Users\01149762\fiftyone\coco-2017\validation\labels.json'
Dataset info written to 'C:\Users\01149762\fiftyone\coco-2017\info.json'
Loading existing dataset 'detector-recipe'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


In [7]:
print(dataset)

Name:        detector-recipe
Media type:  image
Num samples: 100
Persistent:  False
Tags:        ['validation']
Sample fields:
    id:           fiftyone.core.fields.ObjectIdField
    filepath:     fiftyone.core.fields.StringField
    tags:         fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.Metadata)
    ground_truth: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)


In [8]:
sample = next(iter(dataset))
print(sample.)

<Sample: {
    'id': '625c1b08e408cac9cdd9b15b',
    'media_type': 'image',
    'filepath': 'C:\\Users\\01149762\\fiftyone\\coco-2017\\validation\\data\\000000026204.jpg',
    'tags': BaseList(['validation']),
    'metadata': <ImageMetadata: {
        'size_bytes': None,
        'mime_type': None,
        'width': 640,
        'height': 427,
        'num_channels': None,
    }>,
    'ground_truth': <Detections: {
        'detections': BaseList([
            <Detection: {
                'id': '625c1b08e408cac9cdd9b14f',
                'attributes': BaseDict({}),
                'tags': BaseList([]),
                'label': 'car',
                'bounding_box': BaseList([
                    0.585359375,
                    0.604192037470726,
                    0.21040625,
                    0.16988290398126465,
                ]),
                'mask': None,
                'confidence': None,
                'index': None,
                'supercategory': 'vehicle',
           

In [10]:
session = fo.launch_app(dataset)

In [5]:
import torch
import torchvision

# Run the model on GPU if it is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load a pre-trained Faster R-CNN model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.to(device)
model.eval()

print("Model ready")

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to C:\Users\01149762/.cache\torch\hub\checkpoints\fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100.0%


Model ready


In [6]:
# Choose a random subset of 100 samples to add predictions to
predictions_view = dataset.take(100, seed=51)

In [8]:
from PIL import Image
from torchvision.transforms import functional as func

import fiftyone as fo

# Get class list
classes = dataset.default_classes

# Add predictions to samples
with fo.ProgressBar() as pb:
    for sample in pb(predictions_view):
        # Load image
        image = Image.open(sample.filepath)
        image = func.to_tensor(image).to(device)
        c, h, w = image.shape

        # Perform inference
        preds = model([image])[0]
        labels = preds["labels"].cpu().detach().numpy()
        scores = preds["scores"].cpu().detach().numpy()
        boxes = preds["boxes"].cpu().detach().numpy()

        # Convert detections to FiftyOne format
        detections = []
        for label, score, box in zip(labels, scores, boxes):
            # Convert to [top-left-x, top-left-y, width, height]
            # in relative coordinates in [0, 1] x [0, 1]
            x1, y1, x2, y2 = box
            rel_box = [x1 / w, y1 / h, (x2 - x1) / w, (y2 - y1) / h]

            detections.append(
                fo.Detection(
                    label=classes[label],
                    bounding_box=rel_box,
                    confidence=score
                )
            )

        # Save predictions to dataset
        sample["predictions"] = fo.Detections(detections=detections)
        sample.save()

 100% |█████████████████| 100/100 [15.3s elapsed, 0s remaining, 6.7 samples/s]      


In [9]:
session.view = predictions_view
