<br><br>

In [1]:
from ultralytics import YOLO  
import cv2
import time
import torch 
import subprocess
import shutil
import os
print(torch.__version__)
print(torch.cuda.is_available())

2.2.0+cu121
True


<br><br>

In [2]:
model = YOLO('yolov8n.pt')
model.names

{0: 'person',
 1: 'bicycle',
 2: 'car',
 3: 'motorcycle',
 4: 'airplane',
 5: 'bus',
 6: 'train',
 7: 'truck',
 8: 'boat',
 9: 'traffic light',
 10: 'fire hydrant',
 11: 'stop sign',
 12: 'parking meter',
 13: 'bench',
 14: 'bird',
 15: 'cat',
 16: 'dog',
 17: 'horse',
 18: 'sheep',
 19: 'cow',
 20: 'elephant',
 21: 'bear',
 22: 'zebra',
 23: 'giraffe',
 24: 'backpack',
 25: 'umbrella',
 26: 'handbag',
 27: 'tie',
 28: 'suitcase',
 29: 'frisbee',
 30: 'skis',
 31: 'snowboard',
 32: 'sports ball',
 33: 'kite',
 34: 'baseball bat',
 35: 'baseball glove',
 36: 'skateboard',
 37: 'surfboard',
 38: 'tennis racket',
 39: 'bottle',
 40: 'wine glass',
 41: 'cup',
 42: 'fork',
 43: 'knife',
 44: 'spoon',
 45: 'bowl',
 46: 'banana',
 47: 'apple',
 48: 'sandwich',
 49: 'orange',
 50: 'broccoli',
 51: 'carrot',
 52: 'hot dog',
 53: 'pizza',
 54: 'donut',
 55: 'cake',
 56: 'chair',
 57: 'couch',
 58: 'potted plant',
 59: 'bed',
 60: 'dining table',
 61: 'toilet',
 62: 'tv',
 63: 'laptop',
 64: 'mou

In [5]:
def main(tracker_choice='bytetrack'):
    input_path = 'videos/plane_video.mp4'  # Your video file path

    # Validate tracker choice
    if tracker_choice not in ['botsort', 'bytetrack']:
        print(f"Invalid tracker choice '{tracker_choice}', defaulting to 'bytetrack'")
        tracker_choice = 'bytetrack'

    # Load the pretrained YOLOv8 model
    model = YOLO('yolov8n.pt')

    # Check if CUDA is available and set device accordingly
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)
    print(f"Using device: {device}")

    # Load the video file
    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        print(f"Error opening video file {input_path}")
        return

    # Get input video properties for writer
    input_fps = cap.get(cv2.CAP_PROP_FPS)
    if input_fps is None or input_fps <= 0:
        input_fps = 30.0
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))


    # Initialize variables for FPS calculation
    prev_time = 0

    """
    Parameters of track method:
    source: str - path to video file or camera index
    tracker: str - tracker configuration file (e.g., 'bytetrack.yaml' or 'botsort.yaml')
    conf: float - confidence threshold for detections
    stream: bool - if True, yields frames one by one for real-time processing, 
                if False, processes the entire video at once
    """
    results = model.track(
        source=input_path,
        tracker=f'{tracker_choice}.yaml',  # 'bytetrack.yaml' or 'botsort.yaml'
        conf=0.3,                         
        stream=True                      
    )

    # loop through the results
    for frame_result in results:
        # Get the original frame
        img = frame_result.orig_img.copy()

        # Calculate FPS (fall back to input_fps until prev_time set)
        curr_time = time.time()
        fps = 1 / (curr_time - prev_time) if prev_time != 0 else input_fps
        prev_time = curr_time

        # loop through the detected boxes and draw them on the frame
        for box in frame_result.boxes:
            # Extract bounding box coordinates, confidence, class, and track ID 
            x1, y1, x2, y2 = map(int, box.xyxy.cpu().numpy()[0])
            conf = box.conf.cpu().item()
            cls = int(box.cls.cpu().item())
            track_id = int(box.id.cpu().item()) if box.id is not None else -1

            """
            model.names is a dictionary mapping class indices to class names.
            {0: 'person',
            1: 'bicycle',
            2: 'car',
            3: 'motorcycle',
            ...,
            }
            """
            class_name = model.names[cls]

            # Draw bounding box and label
            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
            label = f"{class_name} ID:{track_id} {conf:.2f}"
            cv2.putText(img, label, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        # Display tracker name and FPS on top-left corner
        cv2.putText(img, f"Tracker: {tracker_choice}", (10, 70),
                    cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0), 3)
        cv2.putText(img, f"FPS: {fps:.2f}", (10, 150),
                    cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0), 3)

        # Display the frame with detections  
        cv2.imshow('YOLOv8 Tracking', img)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == '__main__':
    """ There are two trackers available:
        1. ByteTrack (bytetrack.yaml) 
        2. Sort (botsort.yaml) 
    """
    main(tracker_choice='bytetrack')


Using device: cuda

video 1/1 (frame 1/371) c:\Users\sirom\Desktop\repos\OpenCV-Projects-cpp-python\ObjectTracking-yolo-byttetrack-botsort\videos\plane_video.mp4: 384x640 1 airplane, 50.5ms
video 1/1 (frame 2/371) c:\Users\sirom\Desktop\repos\OpenCV-Projects-cpp-python\ObjectTracking-yolo-byttetrack-botsort\videos\plane_video.mp4: 384x640 1 airplane, 33.4ms
video 1/1 (frame 3/371) c:\Users\sirom\Desktop\repos\OpenCV-Projects-cpp-python\ObjectTracking-yolo-byttetrack-botsort\videos\plane_video.mp4: 384x640 1 airplane, 36.9ms
video 1/1 (frame 4/371) c:\Users\sirom\Desktop\repos\OpenCV-Projects-cpp-python\ObjectTracking-yolo-byttetrack-botsort\videos\plane_video.mp4: 384x640 1 airplane, 41.3ms
video 1/1 (frame 5/371) c:\Users\sirom\Desktop\repos\OpenCV-Projects-cpp-python\ObjectTracking-yolo-byttetrack-botsort\videos\plane_video.mp4: 384x640 1 airplane, 33.1ms
video 1/1 (frame 6/371) c:\Users\sirom\Desktop\repos\OpenCV-Projects-cpp-python\ObjectTracking-yolo-byttetrack-botsort\videos\plan