In [1]:
import cv2
from ultralytics import YOLO
import supervision as sv
import numpy as np
import time

In [2]:
VIDEO_PATH = "road.mp4"
MODELS_TO_TEST = [
    # n -> nano version
    "yolov8n.pt",
    "yolo11n.pt",
    "yolo12n.pt"
]

In [3]:
import cv2
from ultralytics import YOLO
import supervision as sv
import numpy as np
import time

VIDEO_PATH = "road.mp4"
MODELS_TO_TEST = [
    "yolov8n.pt",
    "yolo11n.pt",
    "yolo12n.pt"
]

def objectDetection(video_path, model_name):

    print(f"\n---model: {model_name} ---")
    
    model = YOLO(model_name)

    video_info = sv.VideoInfo.from_video_path(video_path)
    frame_gen = sv.get_video_frames_generator(source_path=video_path)

    # Annotator for the bounding box
    box_annotator = sv.BoxAnnotator(
        thickness=2
    )
    # Annotator for the bounding box
    label_annotator = sv.LabelAnnotator(
        text_thickness=1,
        text_scale=0.5
    )
    
    # setup for output video
    output_filename = f"{video_path.split('.')[0]}_{model_name.split('.')[0]}_output.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_filename, fourcc, video_info.fps, video_info.resolution_wh)

    total_inference_time_ms = 0
    frame_count = 0
    total_detections = 0

    for frame in frame_gen:
        frame_count += 1
        start_time = time.time()
        result = model(frame, verbose=False)[0] 
        end_time = time.time()

        inference_time_ms = (end_time - start_time) * 1000
        total_inference_time_ms += inference_time_ms

        detections = sv.Detections.from_ultralytics(result)
        total_detections += len(detections)

        labels = [
            f"{model.names[int(class_id)]} {confidence:.2f}"
            for confidence, class_id
            in zip(detections.confidence, detections.class_id)
        ]

        annotated_frame = box_annotator.annotate(
            scene=frame.copy(),
            detections=detections
        )

        annotated_frame = label_annotator.annotate(
            scene=annotated_frame,
            detections=detections,
            labels=labels
        )

        out.write(annotated_frame)
    
    out.release()

    print(f"Processed {frame_count} frames. Output saved to: {output_filename}")

    avg_inference_time_ms = total_inference_time_ms / frame_count if frame_count > 0 else 0
    fps = 1000 / avg_inference_time_ms if avg_inference_time_ms > 0 else 0


    return {
        "model": model_name,
        "avg_inference_time_ms": avg_inference_time_ms,
        "fps": fps,
        "total_detections": total_detections
    }


if __name__ == "__main__":
    benchmark_results = []
    
    for model_name in MODELS_TO_TEST:
        result = objectDetection(VIDEO_PATH, model_name)
        if result:
            benchmark_results.append(result)

    print("\n\n--- FINAL BENCHMARK SUMMARY ---")
    print("=" * 80) 
    print(f"{'Model':<15} | {'Avg Inference (ms)':<20} | {'FPS':<10} | {'Total Detections':<20}")
    print("-" * 80) 
    
    # Sort results by FPS (highest first) for easy comparison
    for result in sorted(benchmark_results, key=lambda x: x['fps'], reverse=True):
        print(f"{result['model']:<15} | {result['avg_inference_time_ms']:<20.2f} | {result['fps']:<10.2f} | {result['total_detections']:<20}")
    print("=" * 80)


---model: yolov8n.pt ---
Processed 255 frames. Output saved to: road_yolov8n_output.mp4

---model: yolo11n.pt ---
Processed 255 frames. Output saved to: road_yolo11n_output.mp4

---model: yolo12n.pt ---
Processed 255 frames. Output saved to: road_yolo12n_output.mp4


--- FINAL BENCHMARK SUMMARY ---
Model           | Avg Inference (ms)   | FPS        | Total Detections    
--------------------------------------------------------------------------------
yolov8n.pt      | 68.09                | 14.69      | 5098                
yolo11n.pt      | 70.71                | 14.14      | 4826                
yolo12n.pt      | 95.38                | 10.48      | 4932                
