# YOLO26 vs YOLO11 Benchmark on VisDrone Dataset

This notebook provides a comprehensive benchmark comparing **YOLO26** and **YOLO11** on the VisDrone dataset for small object detection.

## Goals
1. Verify **43% faster CPU inference** claim for YOLO26
2. Compare **small object detection** performance (ProgLoss + STAL features)
3. Evaluate **NMS-free end-to-end inference** benefits

## Author
**Murat Raimbekov** - [GitHub](https://github.com/raimbekovm) | [HuggingFace](https://huggingface.co/raimbekovm)

---

## 1. Setup Environment

In [None]:
# Install required packages
!pip install -q ultralytics>=8.3.0 pycocotools>=2.0.7 huggingface_hub pandas matplotlib seaborn

In [None]:
import os
import time
import json
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
from ultralytics import YOLO

# Check environment
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
# Configuration
CONFIG = {
    "epochs": 100,
    "batch": 16,
    "imgsz": 640,
    "device": 0 if torch.cuda.is_available() else "cpu",
    "project": "runs/detect",
    "data": "VisDrone.yaml",
}

print("Training Configuration:")
for k, v in CONFIG.items():
    print(f"  {k}: {v}")

## 2. Download VisDrone Dataset

VisDrone is automatically downloaded by Ultralytics when training with `VisDrone.yaml`.

In [None]:
# Verify dataset download (will download if not exists)
# First train call with epochs=0 will trigger download
print("Checking VisDrone dataset...")
print("Dataset will be downloaded automatically during first training.")

# VisDrone classes
VISDRONE_CLASSES = [
    "pedestrian", "people", "bicycle", "car", "van",
    "truck", "tricycle", "awning-tricycle", "bus", "motor"
]

print(f"\nVisDrone Classes ({len(VISDRONE_CLASSES)}):")
for i, cls in enumerate(VISDRONE_CLASSES):
    print(f"  {i}: {cls}")

## 3. Train YOLO26n on VisDrone

In [None]:
# Initialize YOLO26n model
yolo26 = YOLO("yolo26n.pt")

# Print model info
print("YOLO26n Model Info:")
yolo26.info()

In [None]:
# Train YOLO26n
print("="*60)
print("Training YOLO26n on VisDrone")
print("="*60)

yolo26_results = yolo26.train(
    data=CONFIG["data"],
    epochs=CONFIG["epochs"],
    batch=CONFIG["batch"],
    imgsz=CONFIG["imgsz"],
    device=CONFIG["device"],
    project=CONFIG["project"],
    name="yolo26n_visdrone",
    exist_ok=True,
    plots=True,
    save=True,
)

In [None]:
# Save YOLO26n path
YOLO26_WEIGHTS = f"{CONFIG['project']}/yolo26n_visdrone/weights/best.pt"
print(f"YOLO26n best weights: {YOLO26_WEIGHTS}")

## 4. Train YOLO11n on VisDrone

In [None]:
# Initialize YOLO11n model
yolo11 = YOLO("yolo11n.pt")

# Print model info
print("YOLO11n Model Info:")
yolo11.info()

In [None]:
# Train YOLO11n
print("="*60)
print("Training YOLO11n on VisDrone")
print("="*60)

yolo11_results = yolo11.train(
    data=CONFIG["data"],
    epochs=CONFIG["epochs"],
    batch=CONFIG["batch"],
    imgsz=CONFIG["imgsz"],
    device=CONFIG["device"],
    project=CONFIG["project"],
    name="yolo11n_visdrone",
    exist_ok=True,
    plots=True,
    save=True,
)

In [None]:
# Save YOLO11n path
YOLO11_WEIGHTS = f"{CONFIG['project']}/yolo11n_visdrone/weights/best.pt"
print(f"YOLO11n best weights: {YOLO11_WEIGHTS}")

## 5. Validation & Overall Metrics

In [None]:
# Load trained models
yolo26_trained = YOLO(YOLO26_WEIGHTS)
yolo11_trained = YOLO(YOLO11_WEIGHTS)

print("Models loaded successfully.")

In [None]:
# Validate YOLO26n
print("\nValidating YOLO26n...")
yolo26_val = yolo26_trained.val(
    data=CONFIG["data"],
    imgsz=CONFIG["imgsz"],
    device=CONFIG["device"],
)

# Validate YOLO11n
print("\nValidating YOLO11n...")
yolo11_val = yolo11_trained.val(
    data=CONFIG["data"],
    imgsz=CONFIG["imgsz"],
    device=CONFIG["device"],
)

In [None]:
# Extract metrics
metrics_data = {
    "Model": ["YOLO26n", "YOLO11n"],
    "mAP50": [yolo26_val.box.map50, yolo11_val.box.map50],
    "mAP50-95": [yolo26_val.box.map, yolo11_val.box.map],
    "Precision": [yolo26_val.box.mp, yolo11_val.box.mp],
    "Recall": [yolo26_val.box.mr, yolo11_val.box.mr],
}

metrics_df = pd.DataFrame(metrics_data)
print("\n" + "="*60)
print("Overall Metrics Comparison")
print("="*60)
print(metrics_df.to_string(index=False))

## 6. COCO Evaluation - mAP by Object Size

Using pycocotools to get mAP breakdown by object size (small, medium, large).

In [None]:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from PIL import Image

def create_coco_annotations(data_path, split="val"):
    """Create COCO-format annotations from VisDrone labels."""
    split_map = {
        "train": "VisDrone2019-DET-train",
        "val": "VisDrone2019-DET-val",
        "test": "VisDrone2019-DET-test-dev",
    }
    
    split_dir = Path(data_path) / split_map[split]
    images_dir = split_dir / "images"
    labels_dir = split_dir / "labels"
    
    coco_dict = {
        "images": [],
        "annotations": [],
        "categories": [
            {"id": i, "name": name}
            for i, name in enumerate(VISDRONE_CLASSES)
        ],
    }
    
    annotation_id = 0
    image_files = sorted(list(images_dir.glob("*.jpg")) + list(images_dir.glob("*.png")))
    
    for img_id, img_path in enumerate(image_files):
        with Image.open(img_path) as img:
            width, height = img.size
        
        coco_dict["images"].append({
            "id": img_id,
            "file_name": img_path.name,
            "width": width,
            "height": height,
        })
        
        label_path = labels_dir / f"{img_path.stem}.txt"
        if label_path.exists():
            with open(label_path) as f:
                for line in f:
                    parts = line.strip().split()
                    if len(parts) >= 5:
                        cls_id = int(parts[0])
                        x_center = float(parts[1]) * width
                        y_center = float(parts[2]) * height
                        box_width = float(parts[3]) * width
                        box_height = float(parts[4]) * height
                        
                        x_min = x_center - box_width / 2
                        y_min = y_center - box_height / 2
                        
                        coco_dict["annotations"].append({
                            "id": annotation_id,
                            "image_id": img_id,
                            "category_id": cls_id,
                            "bbox": [x_min, y_min, box_width, box_height],
                            "area": box_width * box_height,
                            "iscrowd": 0,
                        })
                        annotation_id += 1
    
    return coco_dict

def generate_predictions(model, data_path, split="val"):
    """Generate COCO-format predictions."""
    split_map = {
        "train": "VisDrone2019-DET-train",
        "val": "VisDrone2019-DET-val",
    }
    
    split_dir = Path(data_path) / split_map[split]
    images_dir = split_dir / "images"
    
    predictions = []
    image_files = sorted(list(images_dir.glob("*.jpg")) + list(images_dir.glob("*.png")))
    
    print(f"Generating predictions for {len(image_files)} images...")
    
    for img_id, img_path in enumerate(image_files):
        results = model.predict(str(img_path), verbose=False)[0]
        
        if results.boxes is not None and len(results.boxes):
            boxes = results.boxes.xyxy.cpu().numpy()
            scores = results.boxes.conf.cpu().numpy()
            classes = results.boxes.cls.cpu().numpy()
            
            for box, score, cls in zip(boxes, scores, classes):
                x_min, y_min, x_max, y_max = box
                width = x_max - x_min
                height = y_max - y_min
                
                predictions.append({
                    "image_id": img_id,
                    "category_id": int(cls),
                    "bbox": [float(x_min), float(y_min), float(width), float(height)],
                    "score": float(score),
                })
    
    return predictions

print("COCO evaluation functions defined.")

In [None]:
# Find VisDrone dataset path
# Usually in ~/datasets/VisDrone or ./datasets/VisDrone
possible_paths = [
    Path("datasets/VisDrone"),
    Path.home() / "datasets" / "VisDrone",
    Path("/kaggle/input/visdrone"),
]

VISDRONE_PATH = None
for p in possible_paths:
    if p.exists():
        VISDRONE_PATH = p
        break

if VISDRONE_PATH:
    print(f"Found VisDrone dataset at: {VISDRONE_PATH}")
else:
    print("VisDrone dataset not found. Please check paths.")

In [None]:
# Run COCO evaluation if dataset found
if VISDRONE_PATH:
    print("Creating ground truth annotations...")
    gt_dict = create_coco_annotations(VISDRONE_PATH, "val")
    
    # Create COCO object
    coco_gt = COCO()
    coco_gt.dataset = gt_dict
    coco_gt.createIndex()
    
    print(f"Ground truth: {len(gt_dict['images'])} images, {len(gt_dict['annotations'])} annotations")

In [None]:
# Evaluate YOLO26
if VISDRONE_PATH:
    print("\n" + "="*60)
    print("COCO Evaluation: YOLO26n")
    print("="*60)
    
    yolo26_preds = generate_predictions(yolo26_trained, VISDRONE_PATH, "val")
    coco_dt_26 = coco_gt.loadRes(yolo26_preds)
    
    coco_eval_26 = COCOeval(coco_gt, coco_dt_26, "bbox")
    coco_eval_26.evaluate()
    coco_eval_26.accumulate()
    coco_eval_26.summarize()
    
    yolo26_stats = coco_eval_26.stats

In [None]:
# Evaluate YOLO11
if VISDRONE_PATH:
    print("\n" + "="*60)
    print("COCO Evaluation: YOLO11n")
    print("="*60)
    
    yolo11_preds = generate_predictions(yolo11_trained, VISDRONE_PATH, "val")
    coco_dt_11 = coco_gt.loadRes(yolo11_preds)
    
    coco_eval_11 = COCOeval(coco_gt, coco_dt_11, "bbox")
    coco_eval_11.evaluate()
    coco_eval_11.accumulate()
    coco_eval_11.summarize()
    
    yolo11_stats = coco_eval_11.stats

In [None]:
# Compare size-based metrics
if VISDRONE_PATH:
    size_data = {
        "Model": ["YOLO26n", "YOLO11n"],
        "AP_overall": [yolo26_stats[0], yolo11_stats[0]],
        "AP_small": [yolo26_stats[3], yolo11_stats[3]],
        "AP_medium": [yolo26_stats[4], yolo11_stats[4]],
        "AP_large": [yolo26_stats[5], yolo11_stats[5]],
    }
    
    size_df = pd.DataFrame(size_data)
    print("\n" + "="*60)
    print("AP by Object Size (COCO Evaluation)")
    print("="*60)
    print(size_df.to_string(index=False))
    
    # Calculate improvement
    print("\n" + "-"*60)
    print("YOLO26n Improvement over YOLO11n:")
    for metric in ["AP_small", "AP_medium", "AP_large"]:
        diff = size_df[size_df["Model"]=="YOLO26n"][metric].values[0] - size_df[size_df["Model"]=="YOLO11n"][metric].values[0]
        sign = "+" if diff > 0 else ""
        print(f"  {metric}: {sign}{diff:.4f}")

## 7. Speed Benchmark

Comparing inference speed on GPU and CPU.

In [None]:
def benchmark_speed(model, device, warmup=10, runs=100, imgsz=640):
    """Benchmark inference speed."""
    # Create dummy input
    dummy_input = np.random.randint(0, 255, (imgsz, imgsz, 3), dtype=np.uint8)
    
    # Warmup
    print(f"  Warmup ({warmup} runs)...")
    for _ in range(warmup):
        model.predict(dummy_input, device=device, verbose=False)
    
    # Synchronize GPU
    if device != "cpu" and torch.cuda.is_available():
        torch.cuda.synchronize()
    
    # Benchmark
    print(f"  Benchmarking ({runs} runs)...")
    times = []
    for _ in range(runs):
        start = time.perf_counter()
        model.predict(dummy_input, device=device, verbose=False)
        if device != "cpu" and torch.cuda.is_available():
            torch.cuda.synchronize()
        end = time.perf_counter()
        times.append((end - start) * 1000)  # ms
    
    times = np.array(times)
    return {
        "mean_ms": np.mean(times),
        "std_ms": np.std(times),
        "min_ms": np.min(times),
        "max_ms": np.max(times),
        "fps": 1000 / np.mean(times),
    }

print("Speed benchmark function defined.")

In [None]:
# Run GPU benchmark
speed_results = []

if torch.cuda.is_available():
    print("\n" + "="*60)
    print("GPU Speed Benchmark")
    print("="*60)
    
    print("\nYOLO26n on GPU:")
    yolo26_gpu = benchmark_speed(yolo26_trained, device=0)
    speed_results.append({"Model": "YOLO26n", "Device": "GPU", **yolo26_gpu})
    
    print("\nYOLO11n on GPU:")
    yolo11_gpu = benchmark_speed(yolo11_trained, device=0)
    speed_results.append({"Model": "YOLO11n", "Device": "GPU", **yolo11_gpu})
    
    gpu_speedup = yolo11_gpu["mean_ms"] / yolo26_gpu["mean_ms"]
    print(f"\nGPU Speedup: YOLO26 is {gpu_speedup:.2f}x faster than YOLO11")

In [None]:
# Run CPU benchmark
print("\n" + "="*60)
print("CPU Speed Benchmark")
print("="*60)

print("\nYOLO26n on CPU:")
yolo26_cpu = benchmark_speed(yolo26_trained, device="cpu")
speed_results.append({"Model": "YOLO26n", "Device": "CPU", **yolo26_cpu})

print("\nYOLO11n on CPU:")
yolo11_cpu = benchmark_speed(yolo11_trained, device="cpu")
speed_results.append({"Model": "YOLO11n", "Device": "CPU", **yolo11_cpu})

cpu_speedup = yolo11_cpu["mean_ms"] / yolo26_cpu["mean_ms"]
print(f"\nCPU Speedup: YOLO26 is {cpu_speedup:.2f}x faster than YOLO11")
print(f"Claimed speedup: 1.43x (43% faster)")
print(f"Actual speedup: {cpu_speedup:.2f}x ({(cpu_speedup-1)*100:.1f}% faster)")

In [None]:
# Display speed results
speed_df = pd.DataFrame(speed_results)
print("\n" + "="*60)
print("Speed Benchmark Results")
print("="*60)
print(speed_df[["Model", "Device", "mean_ms", "std_ms", "fps"]].to_string(index=False))

## 8. Visualization

In [None]:
# Set style
plt.style.use('seaborn-v0_8-whitegrid')
colors = {'YOLO26n': '#2ecc71', 'YOLO11n': '#3498db'}

# Create figure with subplots
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# 1. mAP Comparison
ax1 = axes[0, 0]
x = np.arange(2)
width = 0.35
ax1.bar(x - width/2, [metrics_df[metrics_df['Model']=='YOLO26n']['mAP50'].values[0], 
                      metrics_df[metrics_df['Model']=='YOLO26n']['mAP50-95'].values[0]], 
        width, label='YOLO26n', color=colors['YOLO26n'])
ax1.bar(x + width/2, [metrics_df[metrics_df['Model']=='YOLO11n']['mAP50'].values[0], 
                      metrics_df[metrics_df['Model']=='YOLO11n']['mAP50-95'].values[0]], 
        width, label='YOLO11n', color=colors['YOLO11n'])
ax1.set_ylabel('mAP')
ax1.set_title('Overall mAP Comparison')
ax1.set_xticks(x)
ax1.set_xticklabels(['mAP50', 'mAP50-95'])
ax1.legend()
ax1.set_ylim(0, 1)

# 2. AP by Size (if available)
ax2 = axes[0, 1]
if VISDRONE_PATH:
    x = np.arange(3)
    ax2.bar(x - width/2, [size_df[size_df['Model']=='YOLO26n']['AP_small'].values[0],
                          size_df[size_df['Model']=='YOLO26n']['AP_medium'].values[0],
                          size_df[size_df['Model']=='YOLO26n']['AP_large'].values[0]], 
            width, label='YOLO26n', color=colors['YOLO26n'])
    ax2.bar(x + width/2, [size_df[size_df['Model']=='YOLO11n']['AP_small'].values[0],
                          size_df[size_df['Model']=='YOLO11n']['AP_medium'].values[0],
                          size_df[size_df['Model']=='YOLO11n']['AP_large'].values[0]], 
            width, label='YOLO11n', color=colors['YOLO11n'])
    ax2.set_ylabel('AP')
    ax2.set_title('AP by Object Size (COCO Evaluation)')
    ax2.set_xticks(x)
    ax2.set_xticklabels(['Small (<32px)', 'Medium (32-96px)', 'Large (>96px)'])
    ax2.legend()
else:
    ax2.text(0.5, 0.5, 'COCO evaluation not available', ha='center', va='center')
    ax2.set_title('AP by Object Size')

# 3. CPU Speed Comparison
ax3 = axes[1, 0]
cpu_data = speed_df[speed_df['Device'] == 'CPU']
ax3.bar(['YOLO26n', 'YOLO11n'], 
        [cpu_data[cpu_data['Model']=='YOLO26n']['mean_ms'].values[0],
         cpu_data[cpu_data['Model']=='YOLO11n']['mean_ms'].values[0]],
        color=[colors['YOLO26n'], colors['YOLO11n']])
ax3.set_ylabel('Inference Time (ms)')
ax3.set_title(f'CPU Inference Speed\n(YOLO26 is {cpu_speedup:.2f}x faster)')

# 4. GPU Speed Comparison (if available)
ax4 = axes[1, 1]
if torch.cuda.is_available():
    gpu_data = speed_df[speed_df['Device'] == 'GPU']
    ax4.bar(['YOLO26n', 'YOLO11n'], 
            [gpu_data[gpu_data['Model']=='YOLO26n']['mean_ms'].values[0],
             gpu_data[gpu_data['Model']=='YOLO11n']['mean_ms'].values[0]],
            color=[colors['YOLO26n'], colors['YOLO11n']])
    ax4.set_ylabel('Inference Time (ms)')
    ax4.set_title(f'GPU Inference Speed\n(YOLO26 is {gpu_speedup:.2f}x faster)')
else:
    ax4.text(0.5, 0.5, 'GPU not available', ha='center', va='center')
    ax4.set_title('GPU Inference Speed')

plt.suptitle('YOLO26 vs YOLO11 Benchmark on VisDrone', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.savefig('benchmark_results.png', dpi=150, bbox_inches='tight')
plt.show()

print("\nVisualization saved to: benchmark_results.png")

## 9. Summary & Key Findings

In [None]:
print("="*70)
print("YOLO26 vs YOLO11 Benchmark Summary - VisDrone Dataset")
print("="*70)

print("\n1. OVERALL ACCURACY:")
print("-"*50)
print(metrics_df.to_string(index=False))

if VISDRONE_PATH:
    print("\n2. ACCURACY BY OBJECT SIZE (COCO):")
    print("-"*50)
    print(size_df.to_string(index=False))

print("\n3. INFERENCE SPEED:")
print("-"*50)
print(speed_df[["Model", "Device", "mean_ms", "fps"]].to_string(index=False))

print("\n4. KEY FINDINGS:")
print("-"*50)
print(f"   - CPU Speedup: YOLO26 is {cpu_speedup:.2f}x faster ({(cpu_speedup-1)*100:.1f}%)")
print(f"   - Ultralytics Claim: 43% faster (1.43x) on CPU")
print(f"   - Claim Verification: {'CONFIRMED' if cpu_speedup >= 1.43 else 'PARTIALLY CONFIRMED' if cpu_speedup >= 1.2 else 'NOT CONFIRMED'}")

if VISDRONE_PATH:
    small_diff = size_df[size_df['Model']=='YOLO26n']['AP_small'].values[0] - size_df[size_df['Model']=='YOLO11n']['AP_small'].values[0]
    print(f"   - Small Object AP Improvement: {'+' if small_diff > 0 else ''}{small_diff:.4f}")
    print(f"   - Small Object Claim: {'CONFIRMED' if small_diff > 0 else 'NOT CONFIRMED'}")

print("\n" + "="*70)

## 10. Upload Best Model to HuggingFace

In [None]:
# Set your HuggingFace token
HF_TOKEN = "YOUR_HF_TOKEN_HERE"  # Replace with your token
HF_REPO = "raimbekovm/yolo26-visdrone"

# Upload model
from huggingface_hub import HfApi

api = HfApi()

try:
    # Upload YOLO26 model
    api.upload_file(
        path_or_fileobj=YOLO26_WEIGHTS,
        path_in_repo="yolo26n_visdrone.pt",
        repo_id=HF_REPO,
        repo_type="space",
        token=HF_TOKEN,
    )
    print(f"Successfully uploaded YOLO26n model to {HF_REPO}")
except Exception as e:
    print(f"Upload failed: {e}")
    print("You can manually upload the model to HuggingFace.")

In [None]:
# Save results to CSV for later use
metrics_df.to_csv("metrics_comparison.csv", index=False)
speed_df.to_csv("speed_benchmark.csv", index=False)
if VISDRONE_PATH:
    size_df.to_csv("coco_evaluation.csv", index=False)

print("Results saved to CSV files.")
print("\nBenchmark complete!")