In [1]:
%pip install -q keras-retinanet

Note: you may need to restart the kernel to use updated packages.


## Loading a Pretrained Keras Model

In [1]:
# Import the urlretrieve function from the urllib.request module and the os module.
from urllib.request import urlretrieve
import os

# Define the name of the pretrained model, the backbone name used in the model, and the base URL for downloading the model.
PRETRAINED_MODEL_NAME = "resnet50_coco_best_v2.1.0.h5"
BACKBONE_NAME = 'resnet50'
PRETRAINED_BASE_URL = (
    "https://github.com/fizyr/keras-retinanet/"
    "releases/download/0.5.1/")

# If the pretrained model file does not exist:
if not os.path.exists(PRETRAINED_MODEL_NAME):
    # Construct the full URL of the model.
    model_url = PRETRAINED_BASE_URL + PRETRAINED_MODEL_NAME
    
    # Display a message indicating that the download process is starting.
    print(f"Downloading {model_url}...")
    
    # Download the model using the urlretrieve function and save it with the same name.
    urlretrieve(model_url, PRETRAINED_MODEL_NAME)
    
    # Display a message indicating that the download process is complete.
    print("done.")

Downloading https://github.com/fizyr/keras-retinanet/releases/download/0.5.1/resnet50_coco_best_v2.1.0.h5...
done.


In [2]:
# Import the 'models' module from the 'keras_retinanet' package.
from keras_retinanet import models

# Load a pre-trained RetinaNet model using the specified model file name and backbone name.
model = models.load_model(PRETRAINED_MODEL_NAME, backbone_name=BACKBONE_NAME)







## Detecting Objects (Location and Classes) in Test Images

We need to define a label to names mapping for visualization purposes: those labels match the classes from the COCO dataset:

In [3]:
labels_to_names = {
    0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane',
    5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light',
    10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench',
    14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow',
    20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack',
    25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee',
    30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite',
    34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard',
    37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass',
    41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl',
    46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli',
    51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake',
    56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed',
    60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse',
    65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave',
    69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book',
    74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear',
    78: 'hair drier', 79: 'toothbrush'}

In [None]:
# Import necessary functions and modules from keras_retinanet.utils
from keras_retinanet.utils.image import read_image_bgr, preprocess_image, resize_image
from keras_retinanet.utils.visualization import draw_box, draw_caption
from keras_retinanet.utils.colors import label_color

# Import additional libraries for visualization and timing
import matplotlib.pyplot as plt
import time
import cv2
import numpy as np

# Define a function for object detection and visualization
def detect_and_visualize(image_bgr):
    # Create a copy of the input image for visualization and convert it to RGB color format
    draw = image_bgr.copy()
    draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)

    # Create a copy of the input image for processing
    image_input = image_bgr.copy()
    
    # Preprocess the input image using functions from keras_retinanet.utils.image
    image_input = preprocess_image(image_input)
    
    # Resize the preprocessed image and obtain the scale factor
    image_input, scale = resize_image(image_input)
    
    # Display information about the processed image (shape, dtype, and value range)
    print(f"shape: {image_input.shape}, dtype: {image_input.dtype}, "
          f"range: {(image_input.min(), image.max())}")

    # Record the start time for processing
    start = time.time()
    
    # Perform object detection on the preprocessed image using the loaded model
    boxes, scores, labels = model.predict_on_batch(
        np.expand_dims(image_input, axis=0))
    
    # Display the processing time
    print(f"processing time: {time.time() - start:.1f}s")

    # Adjust the bounding box coordinates based on the scale factor
    boxes /= scale

    # Iterate through detected objects and visualize them on the image
    for box, score, label in zip(boxes[0], scores[0], labels[0]):
        # If the detection score is below a threshold (0.4), skip further processing
        if score < 0.4:
            break

        # Assign a unique color to each object class
        color = label_color(label)

        # Convert bounding box coordinates to integers
        b = box.astype(int)
        
        # Draw the bounding box on the visualization image
        draw_box(draw, b, color=color)

        # Create a caption with object label and detection score
        caption = "{} {:.3f}".format(labels_to_names[label], score)
        print(caption)
        
        # Draw the caption on the visualization image
        draw_caption(draw, b, caption)

    # Display the visualization image using matplotlib
    plt.figure(figsize=(8, 8))
    plt.axis('off')
    plt.imshow(draw)

# Example usage: Read an image from file, perform detection, and visualize the results
image = read_image_bgr('webcam_shot.jpeg')
detect_and_visualize(image)

shape: (800, 1067, 3), dtype: float32, range: (-123.68, 255)


## Real World Data

Let's play with the laptop webcam:

In [None]:
# Import necessary libraries and modules
import cv2
from keras_retinanet.utils.image import read_image_bgr
import matplotlib.pyplot as plt

# Define a function to grab a snapshot from a camera
def camera_grab(camera_id=0, fallback_filename='webcam_shot.jpeg'):
    # Open a connection to the camera with the specified camera_id
    camera = cv2.VideoCapture(camera_id)
    try:
        # Take 10 consecutive snapshots to allow the camera to tune itself
        # and hope that the contrast and lighting of the last snapshot are good enough.
        for i in range(10):
            snapshot_ok, image = camera.read()
        # If snapshot is not successful, print a warning and use a fallback image
        if not snapshot_ok:
            print("WARNING: could not access camera")
            # If a fallback filename is provided, read an image from that file
            if fallback_filename:
                image = read_image_bgr(fallback_filename)
    finally:
        # Release the camera connection
        camera.release()
    
    # Return the captured image
    return image

In [None]:
# Capture an image from the specified camera (camera_id=0) using the camera_grab function
image = camera_grab(camera_id=0)

# Display the captured image using matplotlib
plt.figure(figsize=(8, 8))
# Convert the image from BGR to RGB color format for proper display
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.axis('off')  # Do not show axis ticks
plt.show()  # Display the image

In [None]:
# Call the detect_and_visualize function to perform object detection and visualization on the captured image
detect_and_visualize(image)