# transformers: Object detection

In [None]:
%matplotlib inline

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch
from torchvision.utils import draw_bounding_boxes
import requests
from PIL import Image
from transformers import (
    pipeline,
    AutoImageProcessor,
    AutoModelForObjectDetection
)

## Load image

In [None]:
# load image
url = 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/segmentation_input.jpg'

image = Image.open(requests.get(url, stream=True).raw)

In [None]:
# show image
fig, ax = plt.subplots(figsize=(6, 4))
ax.imshow(np.asarray(image))
ax.set_aspect('equal', adjustable='box')
fig.tight_layout()

## Load model

In [None]:
# set model name
model_name = 'facebook/detr-resnet-50'

In [None]:
# create image processor
processor = AutoImageProcessor.from_pretrained(model_name)

# load model
model = AutoModelForObjectDetection.from_pretrained(model_name, device_map='auto')
model = model.eval()

In [None]:
# load pipeline (preprocessor, model and postprocessor)
pipe = pipeline('object-detection', model=model_name, device_map='auto')

## Run model

In [None]:
# preprocess images
preprocessed_inputs = processor([image], return_tensors='pt')
x = preprocessed_inputs['pixel_values']

# run model
with torch.no_grad():
    outputs = model(**preprocessed_inputs.to(model.device))

logits = outputs.logits.cpu()
bboxes = outputs.pred_boxes.cpu()

# postprocess outputs
detections = processor.post_process_object_detection(
    outputs,
    threshold=0.5,
    target_sizes=[(image.height, image.width)]
)

print(f'Images shape: {x.shape}')
print(f'Logits shape: {logits.shape}')
print(f'BBoxes shape: {bboxes.shape}')

In [None]:
# summarize detections
detections_iterator = zip(
    detections[0]['scores'],
    detections[0]['labels'],
    detections[0]['boxes']
)

for score, label_idx, bbox in detections_iterator:
    box = [round(coord, 2) for coord in bbox.tolist()]
    label = model.config.id2label[label_idx.item()]
    print(f'{label} ({score:.2f}) in {box}')

In [None]:
# show predictions
image_array = np.array(image)
image_tensor = torch.as_tensor(image_array)

image_tensor = draw_bounding_boxes(
    image_tensor.permute(2, 0, 1),
    boxes=detections[0]['boxes'],
    labels=[model.config.id2label[l] for l in detections[0]['labels'].tolist()]
).permute(1, 2, 0)

fig, ax = plt.subplots(figsize=(6, 4))
ax.imshow(image_tensor.numpy())
ax.set_aspect('equal', adjustable='box')
ax.set_title(f'Predictions')
fig.tight_layout()

## Run pipeline

In [None]:
# run pipeline
results = pipe(image)

print(results)

In [None]:
# reformat detections
detections = {}

detections['scores'] = torch.zeros(len(results), dtype=torch.float32)
detections['labels'] = torch.zeros(len(results), dtype=torch.int64)
detections['boxes'] = torch.zeros((len(results), 4), dtype=torch.float32)

for idx, det in enumerate(results):
    score = det['score']

    label = det['label']
    label_idx = model.config.label2id[label]

    bbox = torch.tensor([
        det['box']['xmin'],
        det['box']['ymin'],
        det['box']['xmax'],
        det['box']['ymax']
    ])

    detections['scores'][idx] = score
    detections['labels'][idx] = label_idx
    detections['boxes'][idx] = bbox

In [None]:
# summarize detections
detections_iterator = zip(
    detections['scores'],
    detections['labels'],
    detections['boxes']
)

for score, label_idx, bbox in detections_iterator:
    box = [round(coord, 2) for coord in bbox.tolist()]
    label = model.config.id2label[label_idx.item()]
    print(f'{label} ({score:.2f}) in {box}')

In [None]:
# show predictions
image_array = np.array(image)
image_tensor = torch.as_tensor(image_array)

image_tensor = draw_bounding_boxes(
    image_tensor.permute(2, 0, 1),
    boxes=detections['boxes'],
    labels=[model.config.id2label[l] for l in detections['labels'].tolist()]
).permute(1, 2, 0)

fig, ax = plt.subplots(figsize=(6, 4))
ax.imshow(image_tensor.numpy())
ax.set_aspect('equal', adjustable='box')
ax.set_title(f'Predictions')
fig.tight_layout()