**Install Logging Lib for GPU**

In [10]:
!pip install nvidia-ml-py



## **Installing other Required Libraries**

In [2]:
import sys
!{sys.executable} -m pip install -U ultralytics
!{sys.executable} -m pip install -U opencv-python
!{sys.executable} -m pip install -U numpy matplotlib pandas scikit-learn scikit-image scikit-video

Collecting ultralytics
  Downloading ultralytics-8.4.6-py3-none-any.whl.metadata (38 kB)
Collecting ultralytics-thop>=2.0.18 (from ultralytics)
  Downloading ultralytics_thop-2.0.18-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.4.6-py3-none-any.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m37.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.18-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.4.6 ultralytics-thop-2.0.18
Collecting opencv-python
  Downloading opencv_python-4.13.0.90-cp37-abi3-manylinux_2_28_x86_64.whl.metadata (19 kB)
Downloading opencv_python-4.13.0.90-cp37-abi3-manylinux_2_28_x86_64.whl (72.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: opencv-python
  Attempting uninstall: opencv-python
    Found ex


# **YOLO Benchmark System**


---



A professional benchmarking tool for comparing YOLO model versions (5, 8, 11, 26)
on video inference with comprehensive metrics logging.
Only highest end X models are supported.

Author: M. Huzaifa Shahbaz (mhuzaifadev@gmail.com)
Version: 2.0.0


In [15]:


import cv2
import time
import json
import logging
import re
import os
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Optional, Tuple
from dataclasses import dataclass, asdict
import numpy as np
from ultralytics import YOLO
import torch


@dataclass
class BenchmarkMetrics:
    """Container for benchmark metrics"""
    model_version: str
    model_size: str
    video_path: str
    total_frames: int
    processed_frames: int
    total_inference_time: float
    avg_fps: float
    min_fps: float
    max_fps: float
    avg_inference_ms: float
    min_inference_ms: float
    max_inference_ms: float
    total_detections: int
    avg_detections_per_frame: float
    avg_confidence: float
    nms_time_ms: float  # For non-YOLO26 models
    timestamp: str

    def to_dict(self) -> Dict:
        """Convert metrics to dictionary"""
        return asdict(self)


class YOLOBenchmark:
    """
    YOLO Benchmark System for evaluating different YOLO versions on video inference.

    Supported Models (Highest End - X variants only):
    - YOLOv5: yolov5xu.pt (Ultralytics-trained, improved performance)
    - YOLOv8: yolov8x.pt
    - YOLO11: yolo11x.pt
    - YOLO26: yolo26x.pt (end-to-end, NMS-free)

    Detection Modes:
    - Mode 1: Detect all objects
    - Mode 2: Detect all vehicles (bicycles, cars, motorcycles, buses, trains, trucks)
    - Mode 3: Detect only humans/persons (COCO class ID: 0)
    """

    # Class variable to track if logging has been initialized
    _logging_initialized = False

    MODEL_VERSIONS = {
        5: "yolov5xu",  # Use 'xu' variant for improved Ultralytics-trained model
        8: "yolov8",
        11: "yolo11",
        26: "yolo26"
    }

    # Only highest end models (x size) are supported
    # Note: YOLO5 uses 'xu' variant (Ultralytics-trained) instead of 'x'
    MODEL_SIZE = "x"  # Fixed to extra-large/highest end

    # COCO dataset class IDs for filtering
    MODE_CLASS_IDS = {
        1: None,  # All classes
        2: [1, 2, 3, 5, 6, 7],  # All vehicles: bicycle(1), car(2), motorcycle(3), bus(5), train(6), truck(7)
        3: [0]     # Person
    }

    MODE_NAMES = {
        1: "all_objects",
        2: "vehicles_only",
        3: "humans_only"
    }

    def __init__(self,
                 output_dir: str = "benchmark_results",
                 log_level: str = "INFO"):
        """
        Initialize YOLO Benchmark System

        Args:
            output_dir: Directory to save benchmark results
            log_level: Logging level (DEBUG, INFO, WARNING, ERROR)
        """
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(exist_ok=True)

        # Setup logging
        self._setup_logging(log_level)

        # Initialize metrics storage
        self.results: List[BenchmarkMetrics] = []

        # Detect and configure device
        self.device, self.device_info = self._detect_device()
        self.workers = self._get_optimal_workers()

        self.logger.info("=" * 80)
        self.logger.info("YOLO BENCHMARK SYSTEM INITIALIZED")
        self.logger.info(f"Device: {self.device_info}")
        self.logger.info(f"Workers: {self.workers}")
        self.logger.info("=" * 80)

    def _setup_logging(self, log_level: str):
        """Configure logging system"""
        # Create logs directory
        log_dir = self.output_dir / "logs"
        log_dir.mkdir(exist_ok=True)

        # Create logger
        self.logger = logging.getLogger("YOLOBenchmark")
        self.logger.setLevel(getattr(logging, log_level))

        # Prevent propagation to root logger to avoid duplicate messages
        self.logger.propagate = False

        # Only setup handlers once (using class variable to track)
        # This prevents duplicate handlers when multiple YOLOBenchmark instances are created
        if not YOLOBenchmark._logging_initialized:
            # File handler
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            fh = logging.FileHandler(log_dir / f"benchmark_{timestamp}.log")
            fh.setLevel(logging.DEBUG)

            # Console handler
            ch = logging.StreamHandler()
            ch.setLevel(getattr(logging, log_level))

            # Formatter
            formatter = logging.Formatter(
                '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                datefmt='%Y-%m-%d %H:%M:%S'
            )
            fh.setFormatter(formatter)
            ch.setFormatter(formatter)

            # Add handlers
            self.logger.addHandler(fh)
            self.logger.addHandler(ch)

            # Mark logging as initialized
            YOLOBenchmark._logging_initialized = True

    def _detect_device(self) -> Tuple[str, str]:
        """
        Detect the best available device (CUDA > MPS > CPU)

        Returns:
            Tuple of (device_string, device_info_string)
        """
        # Check for CUDA (NVIDIA GPU)
        if torch.cuda.is_available():
            device = "cuda"
            device_count = torch.cuda.device_count()
            device_name = torch.cuda.get_device_name(0)
            device_info = f"CUDA ({device_name}, {device_count} GPU(s))"
            return device, device_info

        # Check for MPS (Apple Silicon GPU)
        if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
            device = "mps"
            device_info = "MPS (Apple Silicon GPU)"
            return device, device_info

        # Fallback to CPU
        device = "cpu"
        cpu_count = os.cpu_count() or 4
        device_info = f"CPU ({cpu_count} cores)"
        return device, device_info

    def _get_optimal_workers(self) -> int:
        """
        Get optimal number of workers based on device

        Returns:
            Number of workers to use
        """
        if self.device == "cuda":
            # For CUDA, use fewer workers (GPU handles parallelism)
            return min(4, os.cpu_count() or 4)
        elif self.device == "mps":
            # For MPS, use moderate workers
            return min(2, os.cpu_count() or 2)
        else:
            # For CPU, use maximum workers
            return os.cpu_count() or 4

    def _add_overlay_info(self, frame: np.ndarray, fps: float, inference_time_ms: float,
                          num_detections: int, frame_width: int, frame_height: int):
        """
        Add FPS and detections overlay to frame (left side)

        Args:
            frame: Frame to add overlay to
            fps: Current FPS value (averaged over half second)
            inference_time_ms: Inference time in milliseconds (averaged over half second)
            num_detections: Number of detections in current frame
            frame_width: Width of the frame
            frame_height: Height of the frame
        """
        # Font settings - 25% bigger (1.25x scale)
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 1.25
        thickness = 2

        # FPS text with inference time in brackets
        fps_text = f"FPS: {fps:.1f} ({inference_time_ms:.1f}ms)"

        # Get text size for FPS
        (fps_w, fps_h), fps_baseline = cv2.getTextSize(fps_text, font, font_scale, thickness)

        # Position in left corner with padding
        padding = 10
        fps_x = padding
        fps_y = fps_h + padding

        # Draw black background for FPS
        cv2.rectangle(frame,
                     (fps_x - 5, fps_y - fps_h - 5),
                     (fps_x + fps_w + 5, fps_y + fps_baseline + 5),
                     (0, 0, 0), -1)  # Black background

        # Draw FPS text in white
        cv2.putText(frame, fps_text, (fps_x, fps_y), font, font_scale,
                   (255, 255, 255), thickness)  # White text

        # Detections text
        detections_text = f"Total Detections: {num_detections}"

        # Get text size for detections
        (det_w, det_h), det_baseline = cv2.getTextSize(detections_text, font, font_scale, thickness)

        # Position below FPS block
        det_x = fps_x  # Align with FPS
        det_y = fps_y + fps_h + det_h + 15  # Below FPS with spacing

        # Draw white background for detections
        cv2.rectangle(frame,
                     (det_x - 5, det_y - det_h - 5),
                     (det_x + det_w + 5, det_y + det_baseline + 5),
                     (255, 255, 255), -1)  # White background

        # Draw detections text in dark red
        cv2.putText(frame, detections_text, (det_x, det_y), font, font_scale,
                   (0, 0, 139), thickness)  # Dark red (BGR: 0, 0, 139)

    def _get_model_name(self, version: int) -> str:
        """
        Get model name from version (only highest end X models supported)

        Args:
            version: YOLO version (5, 8, 11, 26)

        Returns:
            Model name string (e.g., 'yolov5xu.pt', 'yolo26x.pt')
        """
        if version not in self.MODEL_VERSIONS:
            raise ValueError(f"Unsupported YOLO version: {version}. "
                           f"Supported versions: {list(self.MODEL_VERSIONS.keys())}")

        model_prefix = self.MODEL_VERSIONS[version]

        # YOLO5 uses 'xu' variant (Ultralytics-trained), others use 'x'
        if version == 5:
            # yolov5xu already includes the 'u', just add '.pt'
            return f"{model_prefix}.pt"
        else:
            # For other versions, append 'x' size
            return f"{model_prefix}{self.MODEL_SIZE}.pt"

    def _load_model(self, model_name: str) -> YOLO:
        """
        Load YOLO model with optimal device configuration

        Args:
            model_name: Name of the model file

        Returns:
            Loaded YOLO model configured for optimal device
        """
        self.logger.info(f"Loading model: {model_name}")
        self.logger.info(f"Using device: {self.device_info}")

        try:
            # Configure PyTorch for optimal performance before loading model
            if self.device == "cuda":
                # Enable TensorFloat-32 for faster computation on Ampere+ GPUs
                torch.backends.cudnn.benchmark = True
                torch.backends.cudnn.deterministic = False  # Allow non-deterministic for speed
                torch.backends.cuda.matmul.allow_tf32 = True
                torch.backends.cudnn.allow_tf32 = True
                # Enable memory efficient attention if available
                if hasattr(torch.backends.cuda, 'enable_flash_sdp'):
                    torch.backends.cuda.enable_flash_sdp(True)
                # Clear cache and optimize memory
                torch.cuda.empty_cache()
                # Enable memory pool for faster allocations
                if hasattr(torch.cuda, 'set_per_process_memory_fraction'):
                    torch.cuda.set_per_process_memory_fraction(0.9)  # Use 90% of GPU memory
                self.logger.info("✓ CUDA optimizations enabled (TF32, cuDNN benchmark, flash attention)")
            elif self.device == "mps":
                # MPS optimizations
                if hasattr(torch.mps, 'empty_cache'):
                    torch.mps.empty_cache()  # Clear MPS cache
                self.logger.info("✓ MPS device configured with cache optimization")
            else:
                # For CPU, set number of threads
                torch.set_num_threads(self.workers)
                torch.set_num_interop_threads(self.workers)
                self.logger.info(f"✓ CPU configured with {self.workers} threads")

            # Load model - Ultralytics YOLO will use the device automatically
            # The device is set via environment or auto-detected
            model = YOLO(model_name)

            self.logger.info(f"✓ Model loaded successfully: {model_name}")
            return model
        except Exception as e:
            self.logger.error(f"✗ Failed to load model {model_name}: {str(e)}")
            raise

    def _get_video_info(self, video_path: str) -> Tuple[int, float, int, int]:
        """
        Extract video information

        Args:
            video_path: Path to video file

        Returns:
            Tuple of (total_frames, fps, width, height)
        """
        cap = cv2.VideoCapture(video_path)

        if not cap.isOpened():
            raise ValueError(f"Cannot open video: {video_path}")

        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        fps = cap.get(cv2.CAP_PROP_FPS)
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

        cap.release()

        return total_frames, fps, width, height

    def benchmark_video(self,
                       video_path: str,
                       version: int,
                       conf_threshold: float = 0.25,
                       iou_threshold: float = 0.45,
                       max_frames: Optional[int] = None,
                       visualize: bool = False,
                       save_video: bool = False,
                       mode: int = 1) -> BenchmarkMetrics:
        """
        Benchmark YOLO model on video

        Args:
            video_path: Path to input video
            version: YOLO version (5, 8, 11, 26) - only highest end X models
            conf_threshold: Confidence threshold for detections
            iou_threshold: IOU threshold for NMS (ignored for YOLO26)
            max_frames: Maximum frames to process (None = all frames)
            visualize: Whether to display results during inference
            save_video: Whether to save annotated output video
            mode: Detection mode (1=all objects, 2=cars only, 3=humans only)

        Returns:
            BenchmarkMetrics object containing all metrics
        """
        # Validate mode
        if mode not in self.MODE_CLASS_IDS:
            raise ValueError(f"Invalid mode: {mode}. Supported modes: {list(self.MODE_CLASS_IDS.keys())}")

        # Get model name (only X size supported)
        model_name = self._get_model_name(version)

        # Get class filter for mode
        class_ids = self.MODE_CLASS_IDS[mode]
        mode_name = self.MODE_NAMES[mode]

        # Log benchmark start
        self.logger.info("")
        self.logger.info("=" * 80)
        self.logger.info(f"BENCHMARK START: {model_name}")
        self.logger.info(f"Video: {video_path}")
        self.logger.info(f"Detection Mode: {mode} ({mode_name})")
        if class_ids:
            self.logger.info(f"Filtering classes: {class_ids}")
        else:
            self.logger.info("Detecting all classes")
        self.logger.info("=" * 80)

        # Load model
        model = self._load_model(model_name)

        # Get video info
        total_frames, video_fps, width, height = self._get_video_info(video_path)
        self.logger.info(f"Video Info: {total_frames} frames @ {video_fps:.2f} FPS "
                        f"({width}x{height})")

        if max_frames:
            process_frames = min(max_frames, total_frames)
            self.logger.info(f"Processing first {process_frames} frames")
        else:
            process_frames = total_frames

        # Setup video writer if saving
        video_writer = None
        output_video_path = None
        if save_video:
            # Generate output video filename
            video_path_obj = Path(video_path)
            base_name = video_path_obj.stem  # filename without extension

            # Sanitize filename: remove/replace problematic characters
            base_name = re.sub(r'[<>:"/\\|?*]', '_', base_name)  # Replace invalid chars
            base_name = base_name[:100]  # Limit length to avoid filesystem issues

            output_video_path = self.output_dir / f"{base_name}_yolo_{version}_x_mode{mode}_{mode_name}.mp4"

            # Create video writer with better codec support
            # Try 'mp4v' first, fallback to 'XVID' if needed
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            video_writer = cv2.VideoWriter(
                str(output_video_path),
                fourcc,
                video_fps,
                (width, height)
            )

            # Verify video writer was created successfully
            if not video_writer.isOpened():
                self.logger.warning(f"Failed to initialize video writer with 'mp4v', trying 'XVID'...")
                fourcc = cv2.VideoWriter_fourcc(*'XVID')
                video_writer = cv2.VideoWriter(
                    str(output_video_path),
                    fourcc,
                    video_fps,
                    (width, height)
                )

                if not video_writer.isOpened():
                    self.logger.error(f"Failed to initialize video writer. Video saving disabled.")
                    video_writer = None
                    save_video = False
                else:
                    self.logger.info(f"✓ Video writer initialized with XVID codec")

            if video_writer and video_writer.isOpened():
                self.logger.info(f"Saving output video to: {output_video_path}")

        # Initialize metrics tracking
        inference_times = []
        fps_values = []
        detection_counts = []
        confidence_scores = []

        # Calculate update interval for half-second updates (video_fps / 2)
        update_interval = max(1, int(video_fps / 2))
        self.logger.info(f"FPS/ms update interval: every {update_interval} frames (half second)")

        # Variables for half-second averaging
        half_second_fps_values = []
        half_second_ms_values = []
        frames_since_update = 0
        displayed_fps = 0.0
        displayed_ms = 0.0

        # Open video
        cap = cv2.VideoCapture(video_path)
        frame_count = 0

        try:
            while cap.isOpened() and frame_count < process_frames:
                ret, frame = cap.read()
                if not ret:
                    break

                # Record start time
                start_time = time.time()

                # Run inference with class filtering based on mode
                # Optimized for maximum GPU performance
                inference_kwargs = {
                    'conf': conf_threshold,
                    'iou': iou_threshold if version != 26 else 0.0,
                    'verbose': False,
                    'device': self.device  # Explicitly set device for inference
                }

                # Enable half precision (FP16) for CUDA/MPS for faster inference
                if self.device in ["cuda", "mps"]:
                    inference_kwargs['half'] = True  # Use FP16 for 2x speed boost on modern GPUs

                # Add class filtering if mode is not "all objects"
                if class_ids is not None:
                    inference_kwargs['classes'] = class_ids

                results = model(frame, **inference_kwargs)

                # Record end time
                end_time = time.time()
                inference_time = (end_time - start_time) * 1000  # Convert to ms

                # Calculate FPS
                fps = 1000 / inference_time if inference_time > 0 else 0

                # Extract detection info
                detections = results[0].boxes
                num_detections = len(detections)

                if num_detections > 0:
                    confidences = detections.conf.cpu().numpy()
                    confidence_scores.extend(confidences.tolist())

                # Store metrics
                inference_times.append(inference_time)
                fps_values.append(fps)
                detection_counts.append(num_detections)

                # Accumulate values for half-second averaging
                half_second_fps_values.append(fps)
                half_second_ms_values.append(inference_time)
                frames_since_update += 1

                # Update displayed values every half second
                if frames_since_update >= update_interval:
                    displayed_fps = np.mean(half_second_fps_values)
                    displayed_ms = np.mean(half_second_ms_values)
                    # Reset accumulators
                    half_second_fps_values = []
                    half_second_ms_values = []
                    frames_since_update = 0
                elif frame_count == 0:
                    # Initialize with first frame values
                    displayed_fps = fps
                    displayed_ms = inference_time

                # Get annotated frame for visualization/saving
                annotated_frame = results[0].plot()

                # Ensure frame dimensions match video writer
                if save_video and video_writer is not None:
                    # Resize if dimensions don't match (shouldn't happen, but safety check)
                    if annotated_frame.shape[:2] != (height, width):
                        annotated_frame = cv2.resize(annotated_frame, (width, height))

                # Add FPS and detections overlay (left side)
                # Use displayed values (averaged) for FPS/ms, current value for detections
                self._add_overlay_info(annotated_frame, displayed_fps, displayed_ms, num_detections, width, height)

                # Save frame to video if enabled
                if save_video and video_writer is not None:
                    video_writer.write(annotated_frame)

                # Visualization
                if visualize:
                    cv2.imshow(f"Benchmark: {model_name}", annotated_frame)

                    if cv2.waitKey(1) & 0xFF == ord('q'):
                        self.logger.info("Benchmark interrupted by user")
                        break

                frame_count += 1

                # Log progress every 100 frames
                if frame_count % 100 == 0:
                    avg_fps_so_far = np.mean(fps_values)
                    self.logger.info(f"Processed {frame_count}/{process_frames} frames "
                                   f"(Avg FPS: {avg_fps_so_far:.2f})")

        finally:
            cap.release()
            if video_writer is not None:
                video_writer.release()
                if output_video_path:
                    self.logger.info(f"✓ Output video saved: {output_video_path}")
            if visualize:
                cv2.destroyAllWindows()

        # Calculate final metrics
        metrics = BenchmarkMetrics(
            model_version=f"YOLO{version}",
            model_size=self.MODEL_SIZE,
            video_path=video_path,
            total_frames=total_frames,
            processed_frames=frame_count,
            total_inference_time=sum(inference_times) / 1000,  # Convert to seconds
            avg_fps=np.mean(fps_values) if fps_values else 0,
            min_fps=np.min(fps_values) if fps_values else 0,
            max_fps=np.max(fps_values) if fps_values else 0,
            avg_inference_ms=np.mean(inference_times) if inference_times else 0,
            min_inference_ms=np.min(inference_times) if inference_times else 0,
            max_inference_ms=np.max(inference_times) if inference_times else 0,
            total_detections=sum(detection_counts),
            avg_detections_per_frame=np.mean(detection_counts) if detection_counts else 0,
            avg_confidence=np.mean(confidence_scores) if confidence_scores else 0,
            nms_time_ms=0.0 if version == 26 else np.mean(inference_times) * 0.1,  # Estimate
            timestamp=datetime.now().isoformat()
        )

        # Store results
        self.results.append(metrics)

        # Log summary
        self._log_metrics_summary(metrics)

        return metrics

    def _log_metrics_summary(self, metrics: BenchmarkMetrics):
        """Log benchmark metrics summary"""
        self.logger.info("")
        self.logger.info("=" * 80)
        self.logger.info("BENCHMARK RESULTS SUMMARY")
        self.logger.info("=" * 80)
        self.logger.info(f"Model: {metrics.model_version}{metrics.model_size}")
        self.logger.info(f"Processed: {metrics.processed_frames}/{metrics.total_frames} frames")
        self.logger.info("")
        self.logger.info("Performance Metrics:")
        self.logger.info(f"  Average FPS: {metrics.avg_fps:.2f}")
        self.logger.info(f"  Min FPS: {metrics.min_fps:.2f}")
        self.logger.info(f"  Max FPS: {metrics.max_fps:.2f}")
        self.logger.info(f"  Avg Inference Time: {metrics.avg_inference_ms:.2f} ms")
        self.logger.info(f"  Min Inference Time: {metrics.min_inference_ms:.2f} ms")
        self.logger.info(f"  Max Inference Time: {metrics.max_inference_ms:.2f} ms")
        self.logger.info("")
        self.logger.info("Detection Metrics:")
        self.logger.info(f"  Total Detections: {metrics.total_detections}")
        self.logger.info(f"  Avg Detections/Frame: {metrics.avg_detections_per_frame:.2f}")
        self.logger.info(f"  Avg Confidence: {metrics.avg_confidence:.3f}")

        if metrics.model_version != "YOLO26":
            self.logger.info(f"  Est. NMS Time: {metrics.nms_time_ms:.2f} ms")
        else:
            self.logger.info("  NMS: Not Applicable (End-to-End)")

        self.logger.info("=" * 80)

    def save_results(self, filename: Optional[str] = None):
        """
        Save benchmark results to JSON file

        Args:
            filename: Output filename (auto-generated if None)
        """
        if not self.results:
            self.logger.warning("No results to save")
            return

        if filename is None:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            filename = f"benchmark_results_{timestamp}.json"

        output_path = self.output_dir / filename

        # Convert results to dict
        results_dict = {
            "benchmark_info": {
                "total_runs": len(self.results),
                "timestamp": datetime.now().isoformat()
            },
            "results": [m.to_dict() for m in self.results]
        }

        # Save to JSON
        with open(output_path, 'w') as f:
            json.dump(results_dict, f, indent=2)

        self.logger.info(f"Results saved to: {output_path}")

    def compare_models(self,
                      video_path: str,
                      versions: List[int],
                      **kwargs) -> List[BenchmarkMetrics]:
        """
        Compare multiple YOLO models on the same video (only highest end X models)

        Args:
            video_path: Path to input video
            versions: List of YOLO versions to compare (5, 8, 11, 26)
            **kwargs: Additional arguments for benchmark_video

        Returns:
            List of BenchmarkMetrics for all models
        """
        # Validate all versions are supported
        for version in versions:
            if version not in self.MODEL_VERSIONS:
                raise ValueError(f"Unsupported YOLO version: {version}. "
                               f"Supported: {list(self.MODEL_VERSIONS.keys())}")

        results = []

        for version in versions:
            self.logger.info("")
            self.logger.info("*" * 80)
            self.logger.info(f"COMPARING: YOLO{version}x (highest end model)")
            self.logger.info("*" * 80)

            metrics = self.benchmark_video(video_path, version, **kwargs)
            results.append(metrics)

        # Log comparison
        self._log_comparison(results)

        return results

    def _log_comparison(self, results: List[BenchmarkMetrics]):
        """Log comparison of multiple benchmark results"""
        self.logger.info("")
        self.logger.info("=" * 80)
        self.logger.info("MODEL COMPARISON")
        self.logger.info("=" * 80)

        for i, metrics in enumerate(results, 1):
            self.logger.info(f"\n{i}. {metrics.model_version}{metrics.model_size}:")
            self.logger.info(f"   Avg FPS: {metrics.avg_fps:.2f}")
            self.logger.info(f"   Avg Inference: {metrics.avg_inference_ms:.2f} ms")
            self.logger.info(f"   Avg Detections: {metrics.avg_detections_per_frame:.2f}")
            self.logger.info(f"   Avg Confidence: {metrics.avg_confidence:.3f}")

        # Find best performer
        best_fps = max(results, key=lambda x: x.avg_fps)
        best_inference = min(results, key=lambda x: x.avg_inference_ms)

        self.logger.info("")
        self.logger.info("Best Performers:")
        self.logger.info(f"  Highest FPS: {best_fps.model_version}{best_fps.model_size} "
                        f"({best_fps.avg_fps:.2f} FPS)")
        self.logger.info(f"  Fastest Inference: {best_inference.model_version}"
                        f"{best_inference.model_size} "
                        f"({best_inference.avg_inference_ms:.2f} ms)")
        self.logger.info("=" * 80)


In [17]:
from typing import Union, List, Optional
import logging
import torch


# GPU utilization monitoring
# Requires: pip install nvidia-ml-py
# Note: Package name is 'nvidia-ml-py' but import is 'pynvml'
try:
    import pynvml  # type: ignore
    pynvml.nvmlInit()
    GPU_MONITORING_AVAILABLE = True
except (ImportError, Exception):
    GPU_MONITORING_AVAILABLE = False
    pynvml = None  # type: ignore


def get_gpu_utilization() -> str:
    """
    Get GPU utilization percentage for NVIDIA GPUs

    Returns:
        String with GPU utilization percentage, or empty string if not available
    """
    if not GPU_MONITORING_AVAILABLE:
        return ""

    if not torch.cuda.is_available():
        return ""

    try:
        handle = pynvml.nvmlDeviceGetHandleByIndex(0)
        utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
        return f" [GPU: {utilization.gpu}%]"
    except Exception as e:
        # Silently fail - GPU monitoring is optional
        return ""


class GPUUtilizationFormatter(logging.Formatter):
    """Custom formatter that appends GPU utilization to log messages"""

    def format(self, record):
        # Get the original formatted message
        msg = super().format(record)
        # Append GPU utilization if available
        gpu_info = get_gpu_utilization()
        return msg + gpu_info


def setup_gpu_logging(benchmark: YOLOBenchmark):
    """
    Patch the benchmark logger to include GPU utilization in log messages

    Args:
        benchmark: YOLOBenchmark instance
    """
    if not GPU_MONITORING_AVAILABLE:
        print("GPU monitoring not available (install: pip install nvidia-ml-py)")
        return

    if not torch.cuda.is_available():
        print("CUDA not available, GPU monitoring disabled")
        return

    # Create custom formatter with GPU utilization
    gpu_formatter = GPUUtilizationFormatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S'
    )

    # Update all console handlers (not file handlers) to use GPU formatter
    updated = False
    for handler in benchmark.logger.handlers:
        if isinstance(handler, logging.StreamHandler) and not isinstance(handler, logging.FileHandler):
            handler.setFormatter(gpu_formatter)
            updated = True

    if updated:
        print("✓ GPU utilization monitoring enabled - GPU % will appear after each log line")
    else:
        print("⚠ Warning: No console handlers found to update with GPU formatter")


def benchmark_yolo(
    video_path: str,
    version: Union[int, List[int]] = 26,
    conf_threshold: float = 0.25,
    iou_threshold: float = 0.45,
    max_frames: Optional[int] = None,
    visualize: bool = False,
    save_video: bool = True,
    mode: int = 1,
    output_dir: str = "benchmark_results",
    save_results: bool = True,
    log_level: str = "INFO"
) -> Union[dict, List[dict]]:
    """
    Driver function to benchmark YOLO models on video inference.

    This function provides a simple interface to run comprehensive benchmarks
    on YOLO models (versions 5, 8, 11, 26) with automatic metrics collection
    and logging. Only highest end X models are supported.

    Parameters:
    ----------
    video_path : str
        Path to the input video file

    version : int or List[int], default=26
        YOLO version(s) to benchmark (only highest end X models):
        - Single version: 5, 8, 11, or 26
        - Multiple versions: [5, 8, 11, 26] for comparison
        Default uses YOLO26x (highest end model)

    conf_threshold : float, default=0.25
        Confidence threshold for detections (0.0 - 1.0)
        Lower = more detections, higher = more confident detections

    iou_threshold : float, default=0.45
        IOU threshold for NMS (ignored for YOLO26 which is NMS-free)

    max_frames : int or None, default=None
        Maximum number of frames to process
        - None: Process entire video
        - int: Process only first N frames (useful for quick tests)

    visualize : bool, default=False
        Whether to display real-time visualization during inference
        Press 'q' to stop visualization early

    save_video : bool, default=True
        Whether to save annotated output videos for each model
        Videos are saved with naming: video_name_yolo_{version}_x_mode{mode}_{mode_name}.mp4

    mode : int, default=1
        Detection mode:
        - 1: Detect all objects
        - 2: Detect all vehicles (bicycles, cars, motorcycles, buses, trains, trucks)
        - 3: Detect only humans/persons (COCO class ID: 0)

    output_dir : str, default="benchmark_results"
        Directory to save benchmark results, logs, and output videos

    save_results : bool, default=True
        Whether to save results to JSON file

    log_level : str, default="INFO"
        Logging verbosity: "DEBUG", "INFO", "WARNING", "ERROR"

    Returns:
    -------
    dict or List[dict]
        Benchmark metrics dictionary/dictionaries containing:
        - model_version: YOLO version
        - model_size: Model size variant
        - video_path: Input video path
        - total_frames: Total frames in video
        - processed_frames: Frames actually processed
        - total_inference_time: Total time spent on inference (seconds)
        - avg_fps: Average frames per second
        - min_fps, max_fps: FPS range
        - avg_inference_ms: Average inference time per frame (milliseconds)
        - min_inference_ms, max_inference_ms: Inference time range
        - total_detections: Total objects detected
        - avg_detections_per_frame: Average detections per frame
        - avg_confidence: Average detection confidence
        - nms_time_ms: Estimated NMS time (0 for YOLO26)
        - timestamp: Benchmark timestamp

    Examples:
    --------
    # Example 1: Quick benchmark of YOLO26x (highest end model)
    >>> results = benchmark_yolo("traffic.mp4")

    # Example 2: Compare all YOLO versions (highest end X models)
    >>> results = benchmark_yolo(
    ...     video_path="traffic.mp4",
    ...     version=[5, 8, 11, 26]
    ... )

    # Example 3: Test YOLO26x with visualization and detect all vehicles
    >>> results = benchmark_yolo(
    ...     video_path="traffic.mp4",
    ...     version=26,
    ...     mode=2,
    ...     visualize=True
    ... )

    # Example 4: Quick test on first 100 frames, detect only humans
    >>> results = benchmark_yolo(
    ...     video_path="traffic.mp4",
    ...     version=26,
    ...     mode=3,
    ...     max_frames=100
    ... )

    # Example 5: Compare all versions detecting all vehicles
    >>> results = benchmark_yolo(
    ...     video_path="traffic.mp4",
    ...     version=[5, 8, 11, 26],
    ...     mode=2
    ... )

    Notes:
    -----
    - Only highest end X models are supported (yolov5xu, yolov8x, yolo11x, yolo26x)
    - YOLO5 uses 'xu' variant (Ultralytics-trained) for improved performance
    - YOLO26 is end-to-end and NMS-free, making it faster on CPUs
    - Mode 1 detects all COCO classes (80 classes)
    - Mode 2 filters to detect all vehicles (bicycles, cars, motorcycles, buses, trains, trucks)
    - Mode 3 filters to detect only persons/humans (class ID: 0)
    - Results are automatically saved to JSON in output_dir
    - Logs are saved to output_dir/logs/
    """

    # Initialize benchmark system
    benchmark = YOLOBenchmark(output_dir=output_dir, log_level=log_level)

    # Setup GPU utilization monitoring for Colab
    setup_gpu_logging(benchmark)

    # Test GPU monitoring and inform user about status
    if GPU_MONITORING_AVAILABLE and torch.cuda.is_available():
        test_gpu = get_gpu_utilization()
        if test_gpu:
            benchmark.logger.info(f"GPU utilization monitoring enabled{test_gpu}")
        else:
            benchmark.logger.info("GPU monitoring available but unable to read GPU stats")
    elif torch.cuda.is_available():
        benchmark.logger.info("GPU utilization monitoring unavailable (install: pip install nvidia-ml-py)")
    else:
        benchmark.logger.info("CUDA not available - running on CPU")

    # Determine if single or multiple versions
    is_comparison = isinstance(version, list)

    if is_comparison:
        # Multiple model comparison
        benchmark.logger.info("=" * 80)
        benchmark.logger.info("MULTI-MODEL COMPARISON MODE")
        benchmark.logger.info(f"Comparing {len(version)} model(s): {version}")
        benchmark.logger.info("=" * 80)

        results = benchmark.compare_models(
            video_path=video_path,
            versions=version,
            conf_threshold=conf_threshold,
            iou_threshold=iou_threshold,
            max_frames=max_frames,
            visualize=visualize,
            save_video=save_video,
            mode=mode
        )

        # Convert to dict list
        results_dict = [r.to_dict() for r in results]

    else:
        # Single model benchmark
        benchmark.logger.info("=" * 80)
        benchmark.logger.info("SINGLE MODEL BENCHMARK MODE")
        benchmark.logger.info(f"Model: YOLO{version}x (highest end)")
        benchmark.logger.info("=" * 80)

        result = benchmark.benchmark_video(
            video_path=video_path,
            version=version,
            conf_threshold=conf_threshold,
            iou_threshold=iou_threshold,
            max_frames=max_frames,
            visualize=visualize,
            save_video=save_video,
            mode=mode
        )

        results_dict = result.to_dict()

    # Save results if requested
    if save_results:
        benchmark.save_results()

    return results_dict




if __name__ == "__main__":
    # Example usage when run directly
    print("""
    ╔══════════════════════════════════════════════════════════════════════════╗
    ║                        YOLO BENCHMARK SYSTEM                             ║
    ║                                                                          ║
    ║  Professional benchmarking tool for YOLO models (v5, v8, v11, v26)       ║
    ║  Only highest end X models supported                                     ║
    ║                                                                          ║
    ╚══════════════════════════════════════════════════════════════════════════╝
    """)


    results = benchmark_yolo(video_path="/content/uk.mp4",version=[5, 8, 11, 26], mode=1, output_dir="/content/drive/MyDrive/yolo-tests/benchmark_results")


2026-01-20 10:54:39 - YOLOBenchmark - INFO - YOLO BENCHMARK SYSTEM INITIALIZED
2026-01-20 10:54:39 - YOLOBenchmark - INFO - YOLO BENCHMARK SYSTEM INITIALIZED
2026-01-20 10:54:39 - YOLOBenchmark - INFO - YOLO BENCHMARK SYSTEM INITIALIZED
2026-01-20 10:54:39 - YOLOBenchmark - INFO - YOLO BENCHMARK SYSTEM INITIALIZED
2026-01-20 10:54:39 - YOLOBenchmark - INFO - Device: CUDA (Tesla T4, 1 GPU(s))
2026-01-20 10:54:39 - YOLOBenchmark - INFO - Device: CUDA (Tesla T4, 1 GPU(s))
2026-01-20 10:54:39 - YOLOBenchmark - INFO - Device: CUDA (Tesla T4, 1 GPU(s))
2026-01-20 10:54:39 - YOLOBenchmark - INFO - Device: CUDA (Tesla T4, 1 GPU(s))
2026-01-20 10:54:39 - YOLOBenchmark - INFO - Workers: 2
2026-01-20 10:54:39 - YOLOBenchmark - INFO - Workers: 2
2026-01-20 10:54:39 - YOLOBenchmark - INFO - Workers: 2
2026-01-20 10:54:39 - YOLOBenchmark - INFO - Workers: 2
2026-01-20 10:54:39 - YOLOBenchmark - INFO - GPU utilization monitoring enabled [GPU: 0%] [GPU: 0%]
2026-01-20 10:54:39 - YOLOBenchmark - INFO -


    ╔══════════════════════════════════════════════════════════════════════════╗
    ║                        YOLO BENCHMARK SYSTEM                             ║
    ║                                                                          ║
    ║  Professional benchmarking tool for YOLO models (v5, v8, v11, v26)       ║
    ║  Only highest end X models supported                                     ║
    ║                                                                          ║
    ║  Usage:                                                                  ║
    ║    python yolo_benchmark_driver.py --video <path> [options]              ║
    ║                                                                          ║
    ║  For help:                                                               ║
    ║    python driver.py --help                                               ║
    ╚══════════════════════════════════════════════════════════════════════════╝
    
✓ GPU utilization moni

2026-01-20 10:54:39 - YOLOBenchmark - INFO - ✓ Model loaded successfully: yolov5xu.pt [GPU: 3%]
2026-01-20 10:54:39 - YOLOBenchmark - INFO - ✓ Model loaded successfully: yolov5xu.pt [GPU: 3%]
2026-01-20 10:54:39 - YOLOBenchmark - INFO - ✓ Model loaded successfully: yolov5xu.pt [GPU: 3%]
2026-01-20 10:54:39 - YOLOBenchmark - INFO - ✓ Model loaded successfully: yolov5xu.pt [GPU: 3%]
2026-01-20 10:54:39 - YOLOBenchmark - INFO - Video Info: 10800 frames @ 60.00 FPS (1920x1080) [GPU: 0%]
2026-01-20 10:54:39 - YOLOBenchmark - INFO - Video Info: 10800 frames @ 60.00 FPS (1920x1080) [GPU: 0%]
2026-01-20 10:54:39 - YOLOBenchmark - INFO - Video Info: 10800 frames @ 60.00 FPS (1920x1080) [GPU: 0%]
2026-01-20 10:54:39 - YOLOBenchmark - INFO - Video Info: 10800 frames @ 60.00 FPS (1920x1080) [GPU: 0%]
2026-01-20 10:54:39 - YOLOBenchmark - INFO - Saving output video to: /content/drive/MyDrive/yolo-tests/benchmark_results/uk_yolo_5_x_mode1_all_objects.mp4 [GPU: 0%]
2026-01-20 10:54:39 - YOLOBenchmark

[KDownloading https://github.com/ultralytics/assets/releases/download/v8.4.0/yolo11x.pt to 'yolo11x.pt': 100% ━━━━━━━━━━━━ 109.3MB 103.6MB/s 1.1s


2026-01-20 11:52:55 - YOLOBenchmark - INFO - ✓ Model loaded successfully: yolo11x.pt [GPU: 0%]
2026-01-20 11:52:55 - YOLOBenchmark - INFO - ✓ Model loaded successfully: yolo11x.pt [GPU: 0%]
2026-01-20 11:52:55 - YOLOBenchmark - INFO - ✓ Model loaded successfully: yolo11x.pt [GPU: 0%]
2026-01-20 11:52:55 - YOLOBenchmark - INFO - ✓ Model loaded successfully: yolo11x.pt [GPU: 0%]
2026-01-20 11:52:55 - YOLOBenchmark - INFO - Video Info: 10800 frames @ 60.00 FPS (1920x1080) [GPU: 0%]
2026-01-20 11:52:55 - YOLOBenchmark - INFO - Video Info: 10800 frames @ 60.00 FPS (1920x1080) [GPU: 0%]
2026-01-20 11:52:55 - YOLOBenchmark - INFO - Video Info: 10800 frames @ 60.00 FPS (1920x1080) [GPU: 0%]
2026-01-20 11:52:55 - YOLOBenchmark - INFO - Video Info: 10800 frames @ 60.00 FPS (1920x1080) [GPU: 0%]
2026-01-20 11:52:55 - YOLOBenchmark - INFO - Saving output video to: /content/drive/MyDrive/yolo-tests/benchmark_results/uk_yolo_11_x_mode1_all_objects.mp4 [GPU: 0%]
2026-01-20 11:52:55 - YOLOBenchmark - 

[KDownloading https://github.com/ultralytics/assets/releases/download/v8.4.0/yolo26x.pt to 'yolo26x.pt': 100% ━━━━━━━━━━━━ 113.2MB 124.5MB/s 0.9s


2026-01-20 12:22:08 - YOLOBenchmark - INFO - ✓ Model loaded successfully: yolo26x.pt [GPU: 0%]
2026-01-20 12:22:08 - YOLOBenchmark - INFO - ✓ Model loaded successfully: yolo26x.pt [GPU: 0%]
2026-01-20 12:22:08 - YOLOBenchmark - INFO - ✓ Model loaded successfully: yolo26x.pt [GPU: 0%]
2026-01-20 12:22:08 - YOLOBenchmark - INFO - ✓ Model loaded successfully: yolo26x.pt [GPU: 0%]
2026-01-20 12:22:08 - YOLOBenchmark - INFO - Video Info: 10800 frames @ 60.00 FPS (1920x1080) [GPU: 0%]
2026-01-20 12:22:08 - YOLOBenchmark - INFO - Video Info: 10800 frames @ 60.00 FPS (1920x1080) [GPU: 0%]
2026-01-20 12:22:08 - YOLOBenchmark - INFO - Video Info: 10800 frames @ 60.00 FPS (1920x1080) [GPU: 0%]
2026-01-20 12:22:08 - YOLOBenchmark - INFO - Video Info: 10800 frames @ 60.00 FPS (1920x1080) [GPU: 0%]
2026-01-20 12:22:08 - YOLOBenchmark - INFO - Saving output video to: /content/drive/MyDrive/yolo-tests/benchmark_results/uk_yolo_26_x_mode1_all_objects.mp4 [GPU: 0%]
2026-01-20 12:22:08 - YOLOBenchmark - 