In [1]:
import cv2
import torch
from ultralytics import YOLO

from common.yolo.yolo_results_aux import results_to_pose_list, pose_list_to_numpy
from common.yolo.nms import nms_for_yolo_pose
from common.tracker.sort_tracker import SortTracker
from common.yolo.visualization import draw_sort_bboxes, draw_skeletons_without_bboxes

# Load the model and video

In [2]:
# Load the YOLOv8n-Pose model
model = YOLO("/opt/models/yolo/yolo11m-pose.pt")

# Test whether the GPU device is available
DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'

# Move the model to the device
model.to(DEVICE)

# Open the video file
cap = cv2.VideoCapture("/opt/videos/raining_street_02.mp4")
if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

# Run the program

In [3]:
# Define the screen and window dimensions
window_width = 1024  # Window width as specified in cv2.resize()
window_height = 800  # Window height as specified in cv2.resize()

# Create the SORT tracker
tracker = SortTracker(max_age=10, min_hits=1, iou_threshold=0.5, max_objects=100)

# Set up the display window at (0, 0) position in the top-left corner
cv2.namedWindow("Frame", cv2.WINDOW_NORMAL)

# Move window to the top-left corner
cv2.moveWindow("Frame", 0, 0)

# Set the window size to the specified width and height
cv2.resizeWindow("Frame", window_width, window_height)

# Process the video
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Resize the frame
    frame = cv2.resize(frame, (460, 460))

    # Perform object detection
    results = model(frame, verbose=False)[0] # Get the first result

    # Use a list to store the detected boxes
    dets_data = []

    # Process the pose
    poses = results_to_pose_list(results)

    # Draw the skeletons without the bounding boxes
    draw_skeletons_without_bboxes(frame, poses, show_names=False)

    # Apply NMS to the poses
    poses = nms_for_yolo_pose(poses)

    # Convert the list of poses to a numpy array
    poses_array = pose_list_to_numpy(poses)

    # Perform object tracking
    tracked_objects = tracker.update(poses_array)

    # Draw the bboxes without labels
    draw_sort_bboxes(frame, tracked_objects)

    # Display the frame
    frame = cv2.resize(frame, (window_width, window_height))
    cv2.imshow("Frame", frame)

    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

# Release the video capture object and close the window
cap.release()
cv2.destroyAllWindows()