In [4]:
from collections import defaultdict
import cv2
import numpy as np

from ultralytics import YOLO

In [5]:
# Load the YOLOv8 model
model = YOLO('yolov8n.pt')

# Open the video file
video_path = "/media/saivt/DATA/Teaching/0229.mp4"
cap = cv2.VideoCapture(video_path)

# Store the track history
track_history = defaultdict(lambda: [])

In [6]:
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # Run YOLOv8 tracking on the frame, persisting tracks between frames
        results = model.track(frame, persist=True)

        # Get the boxes and track IDs
        boxes = results[0].boxes.xywh.cpu()

        if results[0].boxes.id is not None:
            track_ids = results[0].boxes.id.int().cpu().tolist()

            # Visualize the results on the frame
            annotated_frame = results[0].plot()

            # Plot the tracks
            for box, track_id in zip(boxes, track_ids):
                x, y, w, h = box
                track = track_history[track_id]
                track.append((float(x), float(y)))  # x, y center point
                if len(track) > 30:  # retain 90 tracks for 90 frames
                    track.pop(0)

                # Draw the tracking lines
                points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2))
                cv2.polylines(annotated_frame, [points], isClosed=False, color=(230, 230, 230), thickness=10)

            # Display the annotated frame
            cv2.imshow("YOLOv8 Tracking", annotated_frame)
            
            # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break

# Release the video capture object and close the display window
cap.release()
cv2.destroyAllWindows()


0: 192x640 12 cars, 1 stop sign, 7.2ms
Speed: 0.6ms preprocess, 7.2ms inference, 1.1ms postprocess per image at shape (1, 3, 192, 640)

0: 192x640 12 cars, 1 stop sign, 6.6ms
Speed: 0.9ms preprocess, 6.6ms inference, 0.8ms postprocess per image at shape (1, 3, 192, 640)

0: 192x640 14 cars, 1 traffic light, 1 stop sign, 6.5ms
Speed: 0.7ms preprocess, 6.5ms inference, 0.8ms postprocess per image at shape (1, 3, 192, 640)

0: 192x640 14 cars, 1 traffic light, 1 stop sign, 6.8ms
Speed: 0.7ms preprocess, 6.8ms inference, 0.7ms postprocess per image at shape (1, 3, 192, 640)

0: 192x640 13 cars, 1 stop sign, 7.4ms
Speed: 0.9ms preprocess, 7.4ms inference, 1.1ms postprocess per image at shape (1, 3, 192, 640)

0: 192x640 1 car, 8.2ms
Speed: 1.1ms preprocess, 8.2ms inference, 0.8ms postprocess per image at shape (1, 3, 192, 640)

0: 192x640 3 cars, 1 truck, 1 traffic light, 1 stop sign, 7.7ms
Speed: 0.9ms preprocess, 7.7ms inference, 1.7ms postprocess per image at shape (1, 3, 192, 640)

0: 