In [1]:
import torch
import numpy as np
import cv2
from time import time
from ultralytics import YOLO

from supervision.draw.color import ColorPalette
# from supervision.tools.detections import Detections, BoxAnnotator

import supervision as sv

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Using Device: ", device)

Using Device:  cuda


In [6]:
model = load_model()

YOLOv8m summary (fused): 218 layers, 25886080 parameters, 0 gradients, 78.9 GFLOPs


In [7]:
CLASS_NAMES_DICT = model.model.names

In [8]:
box_annotator = sv.BoxAnnotator(color=ColorPalette.default(), thickness=3, text_thickness=3, text_scale=1.5)

In [4]:
def plot_bboxes(results, frame):

    xyxys = []
    confidences = []
    class_ids = []

    # Extract detections for person class
    for result in results[0]:
        class_id = result.boxes.cls.cpu().numpy().astype(int)

        if class_id == 0:
            xyxys.append(result.boxes.xyxy.cpu().numpy())
            confidences.append(result.boxes.conf.cpu().numpy())
            class_ids.append(result.boxes.cls.cpu().numpy().astype(int))

    # Setup detections for visualization
    detections = sv.Detections(
        xyxy=results[0].boxes.xyxy.cpu().numpy(),
        confidence=results[0].boxes.conf.cpu().numpy(),
        class_id=results[0].boxes.cls.cpu().numpy().astype(int),
    )

    # Format custom labels
    labels = [f"{CLASS_NAMES_DICT[class_id]} {confidence:0.2f}"
                    for _, mask, confidence, class_id, tracker_id
                    in detections]

    # Annotate and display frame
    frame = box_annotator.annotate(scene=frame, detections=detections, labels=labels)

    return frame

In [5]:
def predict(frame):

    results = model(frame)

    return results

In [3]:
def load_model():

    model = YOLO("yolov8m.pt")  # load a pretrained YOLOv8n model
    model.fuse()

    return model

In [10]:
cap = cv2.VideoCapture(1)
assert cap.isOpened()

while True:

    start_time = time()

    ret, frame = cap.read()
    assert ret

    results = predict(frame)
    frame = plot_bboxes(results, frame)
    
    end_time = time()
    fps = 1 / np.round(end_time - start_time, 2)

    # cv2.putText(frame, f'FPS: {int(fps)}', (20, 70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0), 2)

    # cv2.imshow('YOLOv8 Detection', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


0: 480x640 (no detections), 32.0ms
Speed: 0.0ms preprocess, 32.0ms inference, 8.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 16.3ms
Speed: 0.0ms preprocess, 16.3ms inference, 6.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 14.8ms
Speed: 0.0ms preprocess, 14.8ms inference, 488.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 16.9ms
Speed: 0.0ms preprocess, 16.9ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 15.8ms
Speed: 0.0ms preprocess, 15.8ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 17.3ms
Speed: 0.0ms preprocess, 17.3ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 25.5ms
Speed: 0.0ms preprocess, 25.5ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 21.6ms
Speed: 0.0ms preprocess, 21.6ms inference, 2.3ms postprocess per image at

In [None]:
cap = cv2.VideoCapture(1)
assert cap.isOpened()

while True:

    ret, frame = cap.read()
    assert ret

    results = predict(frame)
    # frame = plot_bboxes(results, frame)
    
    xyxys = []
    confidences = []
    class_ids = []

    # Extract detections for person class
    for result in results[0]:
        class_id = result.boxes.cls.cpu().numpy().astype(int)

        if class_id == 0:
            xyxys.append(result.boxes.xyxy.cpu().numpy())
            confidences.append(result.boxes.conf.cpu().numpy())
            class_ids.append(result.boxes.cls.cpu().numpy().astype(int))

    # Setup detections for visualization
    detections = sv.Detections(
        xyxy=results[0].boxes.xyxy.cpu().numpy(),
        confidence=results[0].boxes.conf.cpu().numpy(),
        class_id=results[0].boxes.cls.cpu().numpy().astype(int),
    )

    # Format custom labels
    labels = [f"{CLASS_NAMES_DICT[class_id]} {confidence:0.2f}"
                    for _, mask, confidence, class_id, tracker_id
                    in detections]

    # Annotate and display frame
    frame = box_annotator.annotate(scene=frame, detections=detections, labels=labels)

    print()

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()