# YOLO26 vs YOLO11 Benchmark on VisDrone

**Goal:** Verify YOLO26 claims on drone imagery with small objects:
1. **43% faster CPU inference**
2. **Better small object detection** (ProgLoss + STAL)
3. **NMS-free end-to-end inference**

**Dataset:** VisDrone (~90% small objects <32px)

**Author:** Murat Raimbekov | [GitHub](https://github.com/raimbekovm/yolo26-visdrone)

In [None]:
# Install latest ultralytics from GitHub main branch (fixes end2end training bug)
!pip install -q git+https://github.com/ultralytics/ultralytics.git pycocotools pandas matplotlib seaborn
import ultralytics
print(f"Ultralytics version: {ultralytics.__version__}")

In [None]:
import os
import time
import json
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from ultralytics import YOLO

print(f"PyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

# CRITICAL FIX: Monkey-patch ultralytics to disable end2end on all model loads
# This is necessary because YOLO26's end2end head causes:
# "RuntimeError: Inference tensors do not track version counter"
_original_yolo_init = YOLO.__init__

def _patched_yolo_init(self, *args, **kwargs):
    _original_yolo_init(self, *args, **kwargs)
    # Disable end2end after model is loaded
    try:
        if hasattr(self, 'model') and hasattr(self.model, 'model'):
            head = self.model.model[-1]
            if hasattr(head, 'end2end') and head.end2end:
                head.end2end = False
                print(f"[PATCH] Disabled end2end for {type(head).__name__}")
    except Exception as e:
        pass

YOLO.__init__ = _patched_yolo_init
print("[PATCH] YOLO.__init__ patched to disable end2end")

## Configuration

In [None]:
CONFIG = {
    'epochs': 50,  # Reduced for Kaggle time limits
    'batch': 16,
    'imgsz': 640,
    'data': 'VisDrone.yaml',
    'device': 0,
    'workers': 4,
    'project': 'runs',
}

# Speed benchmark config
SPEED_CONFIG = {
    'warmup': 50,
    'runs': 200,
}

print("Config:", CONFIG)

## 1. Train YOLO26n

In [None]:
print("="*60)
print("Training YOLO26n on VisDrone")
print("="*60)

yolo26 = YOLO('yolo26n.pt')
yolo26.info()

yolo26_results = yolo26.train(
    data=CONFIG['data'],
    epochs=CONFIG['epochs'],
    batch=CONFIG['batch'],
    imgsz=CONFIG['imgsz'],
    device=CONFIG['device'],
    workers=CONFIG['workers'],
    project=CONFIG['project'],
    name='yolo26n',
    exist_ok=True,
    plots=True,
    verbose=True,
)

YOLO26_WEIGHTS = 'runs/yolo26n/weights/best.pt'
print(f"\nYOLO26n weights: {YOLO26_WEIGHTS}")

## 2. Train YOLO11n

In [None]:
print("="*60)
print("Training YOLO11n on VisDrone")
print("="*60)

yolo11 = YOLO('yolo11n.pt')
yolo11.info()

yolo11_results = yolo11.train(
    data=CONFIG['data'],
    epochs=CONFIG['epochs'],
    batch=CONFIG['batch'],
    imgsz=CONFIG['imgsz'],
    device=CONFIG['device'],
    workers=CONFIG['workers'],
    project=CONFIG['project'],
    name='yolo11n',
    exist_ok=True,
    plots=True,
    verbose=True,
)

YOLO11_WEIGHTS = 'runs/yolo11n/weights/best.pt'
print(f"\nYOLO11n weights: {YOLO11_WEIGHTS}")

## 3. Validation - Overall Metrics

In [None]:
# Load trained models (monkey-patch will auto-disable end2end)
print("Loading trained models...")
yolo26_trained = YOLO(YOLO26_WEIGHTS)
yolo11_trained = YOLO(YOLO11_WEIGHTS)

# Validate
print("\nValidating YOLO26n...")
val26 = yolo26_trained.val(data=CONFIG['data'], imgsz=CONFIG['imgsz'], device=CONFIG['device'])

print("\nValidating YOLO11n...")
val11 = yolo11_trained.val(data=CONFIG['data'], imgsz=CONFIG['imgsz'], device=CONFIG['device'])

# Extract metrics
metrics = pd.DataFrame({
    'Model': ['YOLO26n', 'YOLO11n'],
    'mAP50': [val26.box.map50, val11.box.map50],
    'mAP50-95': [val26.box.map, val11.box.map],
    'Precision': [val26.box.mp, val11.box.mp],
    'Recall': [val26.box.mr, val11.box.mr],
})

print("\n" + "="*60)
print("OVERALL METRICS")
print("="*60)
print(metrics.to_string(index=False))

## 4. Speed Benchmark (CPU & GPU)

In [None]:
def benchmark_speed(model, device, warmup=50, runs=200, imgsz=640):
    """Benchmark inference speed"""
    dummy = np.random.randint(0, 255, (imgsz, imgsz, 3), dtype=np.uint8)
    
    # Warmup
    for _ in range(warmup):
        model.predict(dummy, device=device, verbose=False)
    
    if device != 'cpu' and torch.cuda.is_available():
        torch.cuda.synchronize()
    
    # Benchmark
    times = []
    for _ in range(runs):
        t0 = time.perf_counter()
        model.predict(dummy, device=device, verbose=False)
        if device != 'cpu' and torch.cuda.is_available():
            torch.cuda.synchronize()
        times.append((time.perf_counter() - t0) * 1000)
    
    return {
        'mean': np.mean(times),
        'std': np.std(times),
        'min': np.min(times),
        'max': np.max(times),
    }

In [None]:
speed_results = []

# GPU Benchmark
print("\n" + "="*60)
print("GPU SPEED BENCHMARK")
print("="*60)

print("\nYOLO26n GPU...")
s26_gpu = benchmark_speed(yolo26_trained, device=0, **SPEED_CONFIG)
speed_results.append({'Model': 'YOLO26n', 'Device': 'GPU', **s26_gpu})
print(f"  Mean: {s26_gpu['mean']:.2f} ms")

print("\nYOLO11n GPU...")
s11_gpu = benchmark_speed(yolo11_trained, device=0, **SPEED_CONFIG)
speed_results.append({'Model': 'YOLO11n', 'Device': 'GPU', **s11_gpu})
print(f"  Mean: {s11_gpu['mean']:.2f} ms")

gpu_speedup = s11_gpu['mean'] / s26_gpu['mean']
print(f"\nGPU Speedup: YOLO26 is {gpu_speedup:.2f}x {'faster' if gpu_speedup > 1 else 'slower'}")

In [None]:
# CPU Benchmark (THE MAIN CLAIM: 43% faster)
print("\n" + "="*60)
print("CPU SPEED BENCHMARK (Main YOLO26 Claim: 43% faster)")
print("="*60)

print("\nYOLO26n CPU... (this takes a while)")
s26_cpu = benchmark_speed(yolo26_trained, device='cpu', warmup=10, runs=50)
speed_results.append({'Model': 'YOLO26n', 'Device': 'CPU', **s26_cpu})
print(f"  Mean: {s26_cpu['mean']:.2f} ms")

print("\nYOLO11n CPU...")
s11_cpu = benchmark_speed(yolo11_trained, device='cpu', warmup=10, runs=50)
speed_results.append({'Model': 'YOLO11n', 'Device': 'CPU', **s11_cpu})
print(f"  Mean: {s11_cpu['mean']:.2f} ms")

cpu_speedup = s11_cpu['mean'] / s26_cpu['mean']
cpu_improvement = (1 - s26_cpu['mean'] / s11_cpu['mean']) * 100

print(f"\n" + "="*60)
print(f"CPU SPEEDUP RESULTS")
print(f"="*60)
print(f"YOLO26n: {s26_cpu['mean']:.2f} ms")
print(f"YOLO11n: {s11_cpu['mean']:.2f} ms")
print(f"Speedup: {cpu_speedup:.2f}x ({cpu_improvement:.1f}% faster)")
print(f"\nClaimed: 43% faster")
print(f"Actual:  {cpu_improvement:.1f}% faster")
print(f"Claim {'VERIFIED ✓' if cpu_improvement >= 40 else 'PARTIALLY VERIFIED' if cpu_improvement >= 20 else 'NOT VERIFIED'}")

In [None]:
speed_df = pd.DataFrame(speed_results)
print("\nSpeed Benchmark Results:")
print(speed_df.to_string(index=False))

## 5. COCO Evaluation - mAP by Object Size

Testing ProgLoss + STAL claim for small objects

In [None]:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from PIL import Image

CLASSES = ['pedestrian', 'people', 'bicycle', 'car', 'van',
           'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor']

# Find dataset path
possible = [Path('datasets/VisDrone'), Path.home()/'datasets'/'VisDrone']
DATA_PATH = next((p for p in possible if p.exists()), None)

if DATA_PATH:
    print(f"Dataset: {DATA_PATH}")
else:
    print("Dataset path not found, skipping COCO eval")

In [None]:
def create_coco_gt(data_path):
    """Create COCO ground truth from VisDrone labels"""
    images_dir = data_path / 'VisDrone2019-DET-val' / 'images'
    labels_dir = data_path / 'VisDrone2019-DET-val' / 'labels'
    
    coco = {'images': [], 'annotations': [], 'categories': [
        {'id': i, 'name': n} for i, n in enumerate(CLASSES)
    ]}
    
    ann_id = 0
    for img_id, img_path in enumerate(sorted(images_dir.glob('*.jpg'))):
        with Image.open(img_path) as im:
            w, h = im.size
        
        coco['images'].append({'id': img_id, 'file_name': img_path.name, 'width': w, 'height': h})
        
        label_path = labels_dir / f"{img_path.stem}.txt"
        if label_path.exists():
            for line in open(label_path):
                parts = line.strip().split()
                if len(parts) >= 5:
                    cls = int(parts[0])
                    xc, yc, bw, bh = map(float, parts[1:5])
                    xc, yc, bw, bh = xc*w, yc*h, bw*w, bh*h
                    coco['annotations'].append({
                        'id': ann_id, 'image_id': img_id, 'category_id': cls,
                        'bbox': [xc-bw/2, yc-bh/2, bw, bh], 'area': bw*bh, 'iscrowd': 0
                    })
                    ann_id += 1
    return coco

def get_predictions(model, data_path):
    """Get COCO format predictions"""
    images_dir = data_path / 'VisDrone2019-DET-val' / 'images'
    preds = []
    
    for img_id, img_path in enumerate(sorted(images_dir.glob('*.jpg'))):
        results = model.predict(str(img_path), verbose=False)[0]
        if results.boxes is not None:
            for box, conf, cls in zip(results.boxes.xyxy.cpu().numpy(),
                                       results.boxes.conf.cpu().numpy(),
                                       results.boxes.cls.cpu().numpy()):
                x1, y1, x2, y2 = box
                preds.append({
                    'image_id': img_id, 'category_id': int(cls),
                    'bbox': [float(x1), float(y1), float(x2-x1), float(y2-y1)],
                    'score': float(conf)
                })
    return preds

In [None]:
size_results = []

if DATA_PATH:
    print("Creating COCO ground truth...")
    gt_dict = create_coco_gt(DATA_PATH)
    coco_gt = COCO()
    coco_gt.dataset = gt_dict
    coco_gt.createIndex()
    print(f"GT: {len(gt_dict['images'])} images, {len(gt_dict['annotations'])} annotations")
    
    # YOLO26
    print("\nEvaluating YOLO26n...")
    preds26 = get_predictions(yolo26_trained, DATA_PATH)
    coco_dt26 = coco_gt.loadRes(preds26)
    eval26 = COCOeval(coco_gt, coco_dt26, 'bbox')
    eval26.evaluate()
    eval26.accumulate()
    eval26.summarize()
    
    size_results.append({
        'Model': 'YOLO26n',
        'AP': eval26.stats[0],
        'AP_small': eval26.stats[3],
        'AP_medium': eval26.stats[4],
        'AP_large': eval26.stats[5],
    })
    
    # YOLO11
    print("\nEvaluating YOLO11n...")
    preds11 = get_predictions(yolo11_trained, DATA_PATH)
    coco_dt11 = coco_gt.loadRes(preds11)
    eval11 = COCOeval(coco_gt, coco_dt11, 'bbox')
    eval11.evaluate()
    eval11.accumulate()
    eval11.summarize()
    
    size_results.append({
        'Model': 'YOLO11n',
        'AP': eval11.stats[0],
        'AP_small': eval11.stats[3],
        'AP_medium': eval11.stats[4],
        'AP_large': eval11.stats[5],
    })

In [None]:
if size_results:
    size_df = pd.DataFrame(size_results)
    
    print("\n" + "="*60)
    print("mAP BY OBJECT SIZE (ProgLoss + STAL Test)")
    print("="*60)
    print(size_df.to_string(index=False))
    
    # Calculate improvement
    small_diff = size_df[size_df['Model']=='YOLO26n']['AP_small'].values[0] - \
                 size_df[size_df['Model']=='YOLO11n']['AP_small'].values[0]
    
    print(f"\nSmall Object AP Improvement: {'+' if small_diff > 0 else ''}{small_diff:.4f}")
    print(f"ProgLoss + STAL Claim: {'VERIFIED ✓' if small_diff > 0 else 'NOT VERIFIED'}")

## 6. Visualization

In [None]:
plt.style.use('seaborn-v0_8-whitegrid')
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

colors = {'YOLO26n': '#2ecc71', 'YOLO11n': '#3498db'}

# 1. mAP Comparison
ax = axes[0, 0]
x = np.arange(2)
w = 0.35
ax.bar(x - w/2, metrics['mAP50'], w, label='mAP50', color='#3498db')
ax.bar(x + w/2, metrics['mAP50-95'], w, label='mAP50-95', color='#2ecc71')
ax.set_xticks(x)
ax.set_xticklabels(metrics['Model'])
ax.set_ylabel('mAP')
ax.set_title('Overall mAP Comparison')
ax.legend()
ax.set_ylim(0, 0.6)

# 2. Speed Comparison
ax = axes[0, 1]
cpu_data = speed_df[speed_df['Device'] == 'CPU']
bars = ax.bar(cpu_data['Model'], cpu_data['mean'], 
              color=[colors[m] for m in cpu_data['Model']], edgecolor='black')
ax.set_ylabel('Inference Time (ms)')
ax.set_title(f'CPU Speed (YOLO26 is {cpu_speedup:.1f}x faster)')
for bar, val in zip(bars, cpu_data['mean']):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1, 
            f'{val:.1f}ms', ha='center', fontweight='bold')

# 3. mAP by Size
ax = axes[1, 0]
if size_results:
    x = np.arange(3)
    w = 0.35
    ax.bar(x - w/2, [size_df[size_df['Model']=='YOLO26n'][c].values[0] for c in ['AP_small', 'AP_medium', 'AP_large']], 
           w, label='YOLO26n', color=colors['YOLO26n'])
    ax.bar(x + w/2, [size_df[size_df['Model']=='YOLO11n'][c].values[0] for c in ['AP_small', 'AP_medium', 'AP_large']], 
           w, label='YOLO11n', color=colors['YOLO11n'])
    ax.set_xticks(x)
    ax.set_xticklabels(['Small\n(<32px)', 'Medium\n(32-96px)', 'Large\n(>96px)'])
    ax.set_ylabel('AP')
    ax.set_title('AP by Object Size (COCO Eval)')
    ax.legend()
else:
    ax.text(0.5, 0.5, 'COCO eval not available', ha='center', va='center')

# 4. GPU Speed
ax = axes[1, 1]
gpu_data = speed_df[speed_df['Device'] == 'GPU']
bars = ax.bar(gpu_data['Model'], gpu_data['mean'], 
              color=[colors[m] for m in gpu_data['Model']], edgecolor='black')
ax.set_ylabel('Inference Time (ms)')
ax.set_title(f'GPU Speed (YOLO26 is {gpu_speedup:.1f}x faster)')
for bar, val in zip(bars, gpu_data['mean']):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1, 
            f'{val:.1f}ms', ha='center', fontweight='bold')

plt.suptitle('YOLO26 vs YOLO11 on VisDrone', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.savefig('benchmark_results.png', dpi=150)
plt.show()

## 7. Summary

In [None]:
print("\n" + "="*70)
print("YOLO26 vs YOLO11 BENCHMARK SUMMARY - VisDrone Dataset")
print("="*70)

print("\n1. OVERALL ACCURACY")
print("-"*50)
print(metrics.to_string(index=False))

print("\n2. SPEED BENCHMARK")
print("-"*50)
print(speed_df[['Model', 'Device', 'mean']].to_string(index=False))

if size_results:
    print("\n3. mAP BY OBJECT SIZE")
    print("-"*50)
    print(size_df.to_string(index=False))

print("\n" + "="*70)
print("KEY FINDINGS")
print("="*70)
print(f"\n✓ CPU Speedup: {cpu_speedup:.2f}x ({cpu_improvement:.1f}% faster)")
print(f"  Claimed: 43% faster")
print(f"  Status: {'VERIFIED ✓' if cpu_improvement >= 40 else 'PARTIALLY VERIFIED' if cpu_improvement >= 20 else 'NOT VERIFIED'}")

print(f"\n✓ GPU Speedup: {gpu_speedup:.2f}x")

if size_results:
    print(f"\n✓ Small Object Detection:")
    print(f"  YOLO26 AP_small: {size_df[size_df['Model']=='YOLO26n']['AP_small'].values[0]:.4f}")
    print(f"  YOLO11 AP_small: {size_df[size_df['Model']=='YOLO11n']['AP_small'].values[0]:.4f}")
    print(f"  Improvement: {'+' if small_diff > 0 else ''}{small_diff:.4f}")
    print(f"  ProgLoss+STAL: {'VERIFIED ✓' if small_diff > 0 else 'NOT VERIFIED'}")

print("\n" + "="*70)

In [None]:
# Save results
metrics.to_csv('metrics.csv', index=False)
speed_df.to_csv('speed_benchmark.csv', index=False)
if size_results:
    size_df.to_csv('size_metrics.csv', index=False)

print("Results saved to CSV files.")
print("\nBenchmark complete!")