# ü¶Å African Wildlife Detection: YOLOv5 + YOLOv8 + Ensemble Learning
**Dataset:** [Ultralytics African Wildlife](https://docs.ultralytics.com/datasets/detect/african-wildlife/)  
**Classes:** Buffalo, Elephant, Rhino, Zebra  
**Goal:** Train YOLOv5 & YOLOv8, then combine them via Weighted Box Fusion ensemble.

## 1. Install Dependencies

In [None]:
!pip install ultralytics ensemble-boxes supervision pycocotools matplotlib seaborn -q

# Clone YOLOv5 repo (needed for its train.py)
!git clone https://github.com/ultralytics/yolov5.git -q
!pip install -r yolov5/requirements.txt -q

print('‚úÖ All dependencies installed.')

## 2. Setup & Imports

In [None]:
import os
import sys
import json
import glob
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns
import cv2
from pathlib import Path
from PIL import Image

import torch
from ultralytics import YOLO
from ensemble_boxes import weighted_boxes_fusion

print(f'PyTorch : {torch.__version__}')
print(f'CUDA    : {torch.cuda.is_available()}')
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Device  : {device}')

## 3. Download the African Wildlife Dataset

In [None]:
# Ultralytics auto-downloads the dataset when you first call .train() or .val()
# OR you can explicitly trigger it like this:
from ultralytics.utils.downloads import download
from ultralytics import settings

# Check where ultralytics stores datasets
DATASET_ROOT = Path(settings['datasets_dir'])
DATASET_PATH = DATASET_ROOT / 'african-wildlife'
print(f'Dataset will be stored at: {DATASET_PATH}')

# Trigger download via a dummy val run (fastest way to get the dataset)
if not DATASET_PATH.exists():
    tmp = YOLO('yolov8n.pt')
    tmp.val(data='african-wildlife.yaml', imgsz=640, verbose=False)
    print('‚úÖ Dataset downloaded.')
else:
    print('‚úÖ Dataset already present.')

## 4. Explore the Dataset

In [None]:
CLASSES = {0: 'Buffalo', 1: 'Elephant', 2: 'Rhino', 3: 'Zebra'}
COLORS  = {0: '#F5A623', 1: '#3B82F6', 2: '#A855F7', 3: '#22C55E'}

splits = ['train', 'val', 'test']
stats = {}

for split in splits:
    img_dir = DATASET_PATH / 'images' / split
    lbl_dir = DATASET_PATH / 'labels' / split
    imgs = list(img_dir.glob('*.jpg')) + list(img_dir.glob('*.png'))
    class_counts = {v: 0 for v in CLASSES.values()}
    for lbl in lbl_dir.glob('*.txt'):
        for line in lbl.read_text().strip().splitlines():
            cls = int(line.split()[0])
            class_counts[CLASSES[cls]] += 1
    stats[split] = {'images': len(imgs), **class_counts}

df = pd.DataFrame(stats).T
print(df)

# Bar chart
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
fig.patch.set_facecolor('#0D1117')
for ax in axes: ax.set_facecolor('#161B22')

# Images per split
axes[0].bar(df.index, df['images'], color=['#F5A623','#3B82F6','#22C55E'], edgecolor='none', width=0.5)
axes[0].set_title('Images per Split', color='white', fontsize=13, fontweight='bold')
axes[0].tick_params(colors='white')
for spine in axes[0].spines.values(): spine.set_edgecolor('#30363D')
axes[0].yaxis.label.set_color('white')

# Class distribution (train)
class_data = df.loc['train', list(CLASSES.values())]
axes[1].bar(class_data.index, class_data.values,
            color=[COLORS[i] for i in range(4)], edgecolor='none', width=0.5)
axes[1].set_title('Class Distribution (Train)', color='white', fontsize=13, fontweight='bold')
axes[1].tick_params(colors='white')
for spine in axes[1].spines.values(): spine.set_edgecolor('#30363D')

plt.tight_layout()
plt.savefig('dataset_stats.png', dpi=150, bbox_inches='tight', facecolor='#0D1117')
plt.show()

In [None]:
# Visualise a few training images with ground-truth boxes
def plot_sample_images(n=6):
    img_dir = DATASET_PATH / 'images' / 'train'
    lbl_dir = DATASET_PATH / 'labels' / 'train'
    img_files = sorted(img_dir.glob('*.jpg'))[:n]

    fig, axes = plt.subplots(2, 3, figsize=(15, 8))
    fig.patch.set_facecolor('#0D1117')
    axes = axes.flatten()

    for ax, img_path in zip(axes, img_files):
        img = np.array(Image.open(img_path).convert('RGB'))
        h, w = img.shape[:2]
        lbl_path = lbl_dir / (img_path.stem + '.txt')

        ax.imshow(img)
        ax.set_facecolor('#161B22')
        ax.axis('off')

        if lbl_path.exists():
            for line in lbl_path.read_text().strip().splitlines():
                cls, cx, cy, bw, bh = map(float, line.split())
                cls = int(cls)
                x1 = (cx - bw/2) * w
                y1 = (cy - bh/2) * h
                rect = patches.Rectangle(
                    (x1, y1), bw*w, bh*h,
                    linewidth=2, edgecolor=COLORS[cls], facecolor='none'
                )
                ax.add_patch(rect)
                ax.text(x1, y1-5, CLASSES[cls],
                        color='white', fontsize=8, fontweight='bold',
                        bbox=dict(facecolor=COLORS[cls], alpha=0.8, pad=2, edgecolor='none'))

    plt.suptitle('Sample Training Images', color='white', fontsize=15, fontweight='bold')
    plt.tight_layout()
    plt.savefig('sample_images.png', dpi=150, bbox_inches='tight', facecolor='#0D1117')
    plt.show()

plot_sample_images()

## 5. Train YOLOv5m

In [None]:
# YOLOv5 uses its own train.py script
# We call it via subprocess
import subprocess

yolov5_cmd = [
    'python', 'yolov5/train.py',
    '--img',     '640',
    '--batch',   '16',
    '--epochs',  '100',
    '--data',    'african-wildlife.yaml',
    '--weights', 'yolov5m.pt',      # pretrained on COCO
    '--project', 'runs/yolov5',
    '--name',    'wildlife',
    '--exist-ok',
    '--cache',
    '--device',  '0' if torch.cuda.is_available() else 'cpu',
]

print('Starting YOLOv5m training...')
result = subprocess.run(yolov5_cmd, capture_output=False)

V5_WEIGHTS = Path('runs/yolov5/wildlife/weights/best.pt')
print(f'\n‚úÖ YOLOv5m training complete. Best weights: {V5_WEIGHTS}')

## 6. Train YOLOv8s

In [None]:
model_v8 = YOLO('yolov8s.pt')   # pretrained on COCO

results_v8 = model_v8.train(
    data       = 'african-wildlife.yaml',
    epochs     = 100,
    imgsz      = 640,
    batch      = 16,
    lr0        = 0.01,
    weight_decay = 0.0005,
    augment    = True,
    mosaic     = 1.0,
    project    = 'runs/yolov8',
    name       = 'wildlife',
    exist_ok   = True,
    device     = device,
    verbose    = True,
)

V8_WEIGHTS = Path('runs/yolov8/wildlife/weights/best.pt')
print(f'\n‚úÖ YOLOv8s training complete. Best weights: {V8_WEIGHTS}')

## 7. Plot Training Curves

In [None]:
def plot_training_curves(csv_path, title, color):
    df = pd.read_csv(csv_path)
    df.columns = df.columns.str.strip()

    fig, axes = plt.subplots(1, 3, figsize=(15, 4))
    fig.patch.set_facecolor('#0D1117')
    for ax in axes: ax.set_facecolor('#161B22')

    metrics = [
        ('metrics/mAP50(B)',      'mAP@0.5'),
        ('metrics/precision(B)',  'Precision'),
        ('metrics/recall(B)',     'Recall'),
    ]

    for ax, (col, label) in zip(axes, metrics):
        if col in df.columns:
            ax.plot(df['epoch'], df[col], color=color, linewidth=2)
            ax.set_title(label, color='white', fontsize=11, fontweight='bold')
            ax.set_xlabel('Epoch', color='#6B7280', fontsize=9)
            ax.tick_params(colors='white')
            ax.grid(alpha=0.1, color='white')
            for spine in ax.spines.values(): spine.set_edgecolor('#30363D')

    plt.suptitle(title, color='white', fontsize=13, fontweight='bold')
    plt.tight_layout()
    plt.savefig(f'{title.replace(" ","_")}_curves.png', dpi=150,
                bbox_inches='tight', facecolor='#0D1117')
    plt.show()

# YOLOv5 results CSV
plot_training_curves('runs/yolov5/wildlife/results.csv',  'YOLOv5m Training', '#F5A623')

# YOLOv8 results CSV
plot_training_curves('runs/yolov8/wildlife/results.csv',  'YOLOv8s Training', '#3B82F6')

## 8. Evaluate Individual Models on Test Set

In [None]:
# Load best weights
model_v5 = YOLO(str(V5_WEIGHTS))
model_v8 = YOLO(str(V8_WEIGHTS))

print('=== YOLOv5m ‚Äî Test Set ===')
metrics_v5 = model_v5.val(data='african-wildlife.yaml', split='test', verbose=True)

print('\n=== YOLOv8s ‚Äî Test Set ===')
metrics_v8 = model_v8.val(data='african-wildlife.yaml', split='test', verbose=True)

def extract_metrics(m, label):
    return {
        'Model':        label,
        'mAP@0.5':      round(m.box.map50, 4),
        'mAP@0.5:0.95': round(m.box.map,   4),
        'Precision':    round(m.box.mp,     4),
        'Recall':       round(m.box.mr,     4),
    }

results_df = pd.DataFrame([
    extract_metrics(metrics_v5, 'YOLOv5m'),
    extract_metrics(metrics_v8, 'YOLOv8s'),
])
print('\n', results_df.to_string(index=False))

## 9. Ensemble: Weighted Box Fusion (WBF)
We combine predictions from both models at inference time using **Weighted Box Fusion**.
- Boxes from both models are clustered by IoU overlap
- Each cluster is fused into one box by weighted averaging coordinates and scores
- Model agreement boosts confidence ‚Äî divergence dampens it

In [None]:
def predict_single(model, image_path, conf=0.25):
    """Run one model and return normalised boxes, scores, labels."""
    img = cv2.imread(str(image_path))
    h, w = img.shape[:2]
    r = model.predict(str(image_path), conf=conf, verbose=False)[0]

    if len(r.boxes) == 0:
        return np.zeros((0,4)), np.zeros(0), np.zeros(0, dtype=int)

    boxes  = r.boxes.xyxy.cpu().numpy().copy()
    scores = r.boxes.conf.cpu().numpy()
    labels = r.boxes.cls.cpu().numpy().astype(int)

    # Normalise to [0, 1] (required by WBF)
    boxes[:, [0,2]] /= w
    boxes[:, [1,3]] /= h
    boxes = np.clip(boxes, 0, 1)
    return boxes, scores, labels


def ensemble_predict(image_path,
                     models,
                     weights=None,
                     iou_thr=0.55,
                     skip_thr=0.30,
                     conf=0.25):
    """
    Run WBF ensemble over a list of YOLO models.

    Args:
        image_path : path to image
        models     : list of YOLO model objects
        weights    : per-model weights (None = uniform)
        iou_thr    : IoU threshold for clustering boxes
        skip_thr   : boxes with fused score < this are dropped
        conf       : detection confidence threshold per model

    Returns:
        fused_boxes  : (N,4) normalised xyxy
        fused_scores : (N,)
        fused_labels : (N,) int
    """
    if weights is None:
        weights = [1.0] * len(models)

    boxes_list, scores_list, labels_list = [], [], []

    for model in models:
        b, s, l = predict_single(model, image_path, conf=conf)
        boxes_list.append(b.tolist())
        scores_list.append(s.tolist())
        labels_list.append(l.tolist())

    fused_boxes, fused_scores, fused_labels = weighted_boxes_fusion(
        boxes_list, scores_list, labels_list,
        weights=weights,
        iou_thr=iou_thr,
        skip_box_thr=skip_thr,
    )
    return fused_boxes, fused_scores, fused_labels.astype(int)


print('‚úÖ Ensemble functions ready.')

## 10. Evaluate Ensemble on Test Set (COCO-style mAP)

In [None]:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import itertools

# ‚îÄ‚îÄ Convert YOLO labels to COCO-format GT ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
def build_coco_gt(img_dir, lbl_dir):
    img_dir = Path(img_dir)
    lbl_dir = Path(lbl_dir)
    coco = {'images': [], 'annotations': [], 'categories': [
        {'id': i, 'name': v} for i, v in CLASSES.items()
    ]}
    ann_id = 0
    for img_id, img_path in enumerate(sorted(img_dir.glob('*.jpg'))):
        img = Image.open(img_path)
        w, h = img.size
        coco['images'].append({'id': img_id, 'file_name': img_path.name,
                               'width': w, 'height': h})
        lbl_path = lbl_dir / (img_path.stem + '.txt')
        if lbl_path.exists():
            for line in lbl_path.read_text().strip().splitlines():
                cls, cx, cy, bw, bh = map(float, line.split())
                x1 = (cx - bw/2) * w
                y1 = (cy - bh/2) * h
                coco['annotations'].append({
                    'id': ann_id, 'image_id': img_id,
                    'category_id': int(cls),
                    'bbox': [x1, y1, bw*w, bh*h],
                    'area': bw*w * bh*h,
                    'iscrowd': 0,
                })
                ann_id += 1
    return coco


# ‚îÄ‚îÄ Run ensemble on all test images & collect predictions ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
def evaluate_ensemble(models, weights, iou_thr=0.55, skip_thr=0.30):
    test_img_dir = DATASET_PATH / 'images' / 'test'
    test_lbl_dir = DATASET_PATH / 'labels' / 'test'

    coco_gt_dict = build_coco_gt(test_img_dir, test_lbl_dir)

    # Write GT to temp json
    gt_path = '/tmp/coco_gt.json'
    with open(gt_path, 'w') as f:
        json.dump(coco_gt_dict, f)
    coco_gt = COCO(gt_path)

    img_list = sorted(test_img_dir.glob('*.jpg'))
    img_name_to_id = {im['file_name']: im['id'] for im in coco_gt_dict['images']}

    dt_list = []
    for img_path in img_list:
        img_id = img_name_to_id[img_path.name]
        img = Image.open(img_path)
        W, H = img.size

        fboxes, fscores, flabels = ensemble_predict(
            img_path, models, weights=weights,
            iou_thr=iou_thr, skip_thr=skip_thr)

        for box, score, label in zip(fboxes, fscores, flabels):
            x1, y1, x2, y2 = box
            dt_list.append({
                'image_id':    img_id,
                'category_id': int(label),
                'bbox': [x1*W, y1*H, (x2-x1)*W, (y2-y1)*H],
                'score': float(score),
            })

    coco_dt = coco_gt.loadRes(dt_list) if dt_list else coco_gt.loadRes([])
    ev = COCOeval(coco_gt, coco_dt, iouType='bbox')
    ev.evaluate(); ev.accumulate(); ev.summarize()
    return ev.stats  # [mAP@0.5:0.95, mAP@0.5, ...]


print('Running ensemble evaluation (this may take a few minutes)...')
ensemble_stats = evaluate_ensemble(
    models  = [model_v5, model_v8],
    weights = [0.45, 0.55],   # slightly favour YOLOv8s
    iou_thr = 0.55,
    skip_thr= 0.30,
)

print(f'\nEnsemble mAP@0.5      : {ensemble_stats[1]:.4f}')
print(f'Ensemble mAP@0.5:0.95 : {ensemble_stats[0]:.4f}')

## 11. Compare All Three: Single Models vs Ensemble

In [None]:
# Build comparison dataframe
comparison = pd.DataFrame([
    {
        'Model':        'YOLOv5m',
        'mAP@0.5':      metrics_v5.box.map50,
        'mAP@0.5:0.95': metrics_v5.box.map,
        'Precision':    metrics_v5.box.mp,
        'Recall':       metrics_v5.box.mr,
    },
    {
        'Model':        'YOLOv8s',
        'mAP@0.5':      metrics_v8.box.map50,
        'mAP@0.5:0.95': metrics_v8.box.map,
        'Precision':    metrics_v8.box.mp,
        'Recall':       metrics_v8.box.mr,
    },
    {
        'Model':        'Ensemble (WBF)',
        'mAP@0.5':      ensemble_stats[1],
        'mAP@0.5:0.95': ensemble_stats[0],
        'Precision':    None,   # COCO eval doesn't directly return P/R in same form
        'Recall':       None,
    },
])

print(comparison.to_string(index=False, float_format='{:.4f}'.format))

In [None]:
# Bar chart comparison
metrics_to_plot = ['mAP@0.5', 'mAP@0.5:0.95']
bar_colors = ['#F5A623', '#3B82F6', '#22C55E']
models_labels = comparison['Model'].tolist()

fig, axes = plt.subplots(1, 2, figsize=(14, 5))
fig.patch.set_facecolor('#0D1117')

for ax, metric in zip(axes, metrics_to_plot):
    vals = comparison[metric].dropna().tolist()
    lbls = comparison.loc[comparison[metric].notna(), 'Model'].tolist()
    bars = ax.bar(lbls, vals, color=bar_colors[:len(vals)],
                  edgecolor='none', width=0.5)

    for bar, val in zip(bars, vals):
        ax.text(bar.get_x() + bar.get_width()/2,
                bar.get_height() + 0.005,
                f'{val:.3f}', ha='center', va='bottom',
                color='white', fontsize=10, fontweight='bold')

    ax.set_title(metric, color='white', fontsize=12, fontweight='bold')
    ax.set_facecolor('#161B22')
    ax.tick_params(colors='white')
    ax.set_ylim(0, 1.05)
    for spine in ax.spines.values(): spine.set_edgecolor('#30363D')
    ax.yaxis.label.set_color('white')

plt.suptitle('Model Comparison: YOLOv5m vs YOLOv8s vs Ensemble',
             color='white', fontsize=13, fontweight='bold')
plt.tight_layout()
plt.savefig('model_comparison.png', dpi=150,
            bbox_inches='tight', facecolor='#0D1117')
plt.show()

# Print improvements
v5_map  = metrics_v5.box.map50
v8_map  = metrics_v8.box.map50
ens_map = ensemble_stats[1]
print(f'\nüìä mAP@0.5 Improvements:')
print(f'  Ensemble vs YOLOv5m : +{(ens_map - v5_map)*100:.2f}%')
print(f'  Ensemble vs YOLOv8s : +{(ens_map - v8_map)*100:.2f}%')

## 12. Visualise: Single Model vs Ensemble Predictions

In [None]:
def draw_boxes(ax, image_np, boxes_norm, scores, labels, title, color_map):
    """Draw normalised xyxy boxes on a matplotlib axis."""
    h, w = image_np.shape[:2]
    ax.imshow(image_np)
    ax.set_title(title, color='white', fontsize=10, fontweight='bold')
    ax.axis('off')
    ax.set_facecolor('#161B22')

    for box, score, label in zip(boxes_norm, scores, labels):
        x1, y1, x2, y2 = box[0]*w, box[1]*h, box[2]*w, box[3]*h
        rect = patches.Rectangle(
            (x1, y1), x2-x1, y2-y1,
            linewidth=2, edgecolor=color_map.get(int(label), 'white'),
            facecolor='none'
        )
        ax.add_patch(rect)
        ax.text(x1, y1-6,
                f"{CLASSES.get(int(label), '?')} {score:.2f}",
                color='white', fontsize=7, fontweight='bold',
                bbox=dict(facecolor=color_map.get(int(label), '#333'),
                          alpha=0.85, pad=1.5, edgecolor='none'))


def compare_predictions(image_path, n_cols=3):
    """Side-by-side: YOLOv5 | YOLOv8 | Ensemble."""
    img_np = np.array(Image.open(image_path).convert('RGB'))
    h, w   = img_np.shape[:2]

    b5, s5, l5 = predict_single(model_v5, image_path)
    b8, s8, l8 = predict_single(model_v8, image_path)
    be, se, le = ensemble_predict(image_path, [model_v5, model_v8],
                                  weights=[0.45, 0.55])

    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    fig.patch.set_facecolor('#0D1117')

    draw_boxes(axes[0], img_np, b5, s5, l5, f'YOLOv5m  ({len(b5)} detections)', COLORS)
    draw_boxes(axes[1], img_np, b8, s8, l8, f'YOLOv8s  ({len(b8)} detections)', COLORS)
    draw_boxes(axes[2], img_np, be, se, le, f'Ensemble WBF  ({len(be)} detections)', COLORS)

    plt.suptitle(f'Predictions on {Path(image_path).name}',
                 color='white', fontsize=12, fontweight='bold')
    plt.tight_layout()
    plt.savefig(f'comparison_{Path(image_path).stem}.png', dpi=150,
                bbox_inches='tight', facecolor='#0D1117')
    plt.show()


# Run on first 3 test images
test_imgs = sorted((DATASET_PATH / 'images' / 'test').glob('*.jpg'))[:3]
for img_path in test_imgs:
    compare_predictions(img_path)

## 13. Per-Class AP with Ensemble

In [None]:
# Per-class AP from YOLOv5 and YOLOv8 (available directly from Ultralytics)
class_names = list(CLASSES.values())

ap_v5 = metrics_v5.box.ap50          # array of per-class AP@0.5
ap_v8 = metrics_v8.box.ap50

# Per-class ensemble AP requires custom COCO eval ‚Äî approximated as:
# run evaluate_ensemble() with per_class=True using COCOeval catIds filter
def per_class_ensemble_ap(models, weights, iou_thr=0.55):
    test_img_dir = DATASET_PATH / 'images' / 'test'
    test_lbl_dir = DATASET_PATH / 'labels' / 'test'
    coco_gt_dict = build_coco_gt(test_img_dir, test_lbl_dir)
    gt_path = '/tmp/coco_gt_pc.json'
    with open(gt_path, 'w') as f: json.dump(coco_gt_dict, f)
    coco_gt = COCO(gt_path)
    img_list = sorted(test_img_dir.glob('*.jpg'))
    img_name_to_id = {im['file_name']: im['id'] for im in coco_gt_dict['images']}

    dt_list = []
    for img_path in img_list:
        img_id = img_name_to_id[img_path.name]
        img = Image.open(img_path); W, H = img.size
        fboxes, fscores, flabels = ensemble_predict(img_path, models, weights=weights, iou_thr=iou_thr)
        for box, score, label in zip(fboxes, fscores, flabels):
            x1,y1,x2,y2 = box
            dt_list.append({'image_id': img_id, 'category_id': int(label),
                            'bbox': [x1*W, y1*H, (x2-x1)*W, (y2-y1)*H], 'score': float(score)})

    coco_dt = coco_gt.loadRes(dt_list)
    aps = []
    for cat_id in range(4):
        ev = COCOeval(coco_gt, coco_dt, iouType='bbox')
        ev.params.catIds = [cat_id]
        ev.params.iouThrs = np.array([0.5])
        ev.evaluate(); ev.accumulate()
        # stats[1] = AP@0.5
        ev.summarize()
        aps.append(ev.stats[0])
    return aps

print('Computing per-class AP for ensemble...')
ap_ensemble = per_class_ensemble_ap([model_v5, model_v8], weights=[0.45, 0.55])

# Plot grouped bar chart
x = np.arange(len(class_names))
width = 0.25

fig, ax = plt.subplots(figsize=(12, 5))
fig.patch.set_facecolor('#0D1117')
ax.set_facecolor('#161B22')

r1 = ax.bar(x - width, ap_v5,       width, label='YOLOv5m',       color='#F5A623', edgecolor='none')
r2 = ax.bar(x,         ap_v8,       width, label='YOLOv8s',       color='#3B82F6', edgecolor='none')
r3 = ax.bar(x + width, ap_ensemble, width, label='Ensemble (WBF)', color='#22C55E', edgecolor='none')

for bars in [r1, r2, r3]:
    for bar in bars:
        ax.text(bar.get_x() + bar.get_width()/2,
                bar.get_height() + 0.01,
                f'{bar.get_height():.2f}',
                ha='center', va='bottom', color='white', fontsize=8)

ax.set_xticks(x)
ax.set_xticklabels(class_names, color='white', fontsize=11)
ax.set_ylabel('AP@0.5', color='white')
ax.set_title('Per-Class AP@0.5: YOLOv5m vs YOLOv8s vs Ensemble',
             color='white', fontsize=12, fontweight='bold')
ax.tick_params(colors='white')
ax.set_ylim(0, 1.05)
ax.legend(facecolor='#1C2128', labelcolor='white', fontsize=10)
for spine in ax.spines.values(): spine.set_edgecolor('#30363D')

plt.tight_layout()
plt.savefig('per_class_ap.png', dpi=150, bbox_inches='tight', facecolor='#0D1117')
plt.show()

## 14. WBF Weight Sensitivity Analysis

In [None]:
# Sweep different weight ratios for v5 vs v8 to find the optimal split
weight_ratios = [(1.0, 0.0), (0.7, 0.3), (0.55, 0.45),
                 (0.5, 0.5), (0.45, 0.55), (0.3, 0.7), (0.0, 1.0)]
labels_wr = [f'v5={a:.1f}\nv8={b:.1f}' for a, b in weight_ratios]
maps_wr = []

for w5, w8 in weight_ratios:
    if w5 == 1.0:
        maps_wr.append(metrics_v5.box.map50)
    elif w8 == 1.0:
        maps_wr.append(metrics_v8.box.map50)
    else:
        stats = evaluate_ensemble([model_v5, model_v8], weights=[w5, w8])
        maps_wr.append(stats[1])
    print(f'  w=({w5:.2f},{w8:.2f}) ‚Üí mAP@0.5 = {maps_wr[-1]:.4f}')

fig, ax = plt.subplots(figsize=(11, 4))
fig.patch.set_facecolor('#0D1117')
ax.set_facecolor('#161B22')

bars = ax.bar(labels_wr, maps_wr, color='#3B82F6', edgecolor='none', width=0.5)

best_idx = np.argmax(maps_wr)
bars[best_idx].set_color('#22C55E')

for bar, val in zip(bars, maps_wr):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.003,
            f'{val:.3f}', ha='center', va='bottom',
            color='white', fontsize=8, fontweight='bold')

ax.axhline(metrics_v5.box.map50, color='#F5A623', linestyle='--', linewidth=1.2, label='YOLOv5m alone')
ax.axhline(metrics_v8.box.map50, color='#3B82F6', linestyle='--', linewidth=1.2, label='YOLOv8s alone')

ax.set_ylabel('mAP@0.5', color='white')
ax.set_title('WBF Weight Sensitivity Analysis', color='white', fontsize=12, fontweight='bold')
ax.tick_params(colors='white')
ax.set_ylim(min(maps_wr) - 0.02, 1.0)
ax.legend(facecolor='#1C2128', labelcolor='white')
for spine in ax.spines.values(): spine.set_edgecolor('#30363D')

plt.tight_layout()
plt.savefig('wbf_weight_sensitivity.png', dpi=150, bbox_inches='tight', facecolor='#0D1117')
plt.show()

print(f'\nüèÜ Best weight ratio: v5={weight_ratios[best_idx][0]:.2f}, v8={weight_ratios[best_idx][1]:.2f}')
print(f'   Best mAP@0.5 = {maps_wr[best_idx]:.4f}')

## 15. Final Summary

In [None]:
summary = pd.DataFrame([
    {'Model': 'YOLOv5m',        'mAP@0.5': metrics_v5.box.map50,  'mAP@0.5:0.95': metrics_v5.box.map,  'Precision': metrics_v5.box.mp, 'Recall': metrics_v5.box.mr},
    {'Model': 'YOLOv8s',        'mAP@0.5': metrics_v8.box.map50,  'mAP@0.5:0.95': metrics_v8.box.map,  'Precision': metrics_v8.box.mp, 'Recall': metrics_v8.box.mr},
    {'Model': 'Ensemble (WBF)', 'mAP@0.5': ensemble_stats[1],     'mAP@0.5:0.95': ensemble_stats[0],   'Precision': None,              'Recall': None},
])

print('=' * 65)
print('              FINAL RESULTS SUMMARY')
print('=' * 65)
print(summary.to_string(index=False, float_format='{:.4f}'.format))
print('=' * 65)

best_single = max(metrics_v5.box.map50, metrics_v8.box.map50)
gain = (ensemble_stats[1] - best_single) * 100
print(f'\nüöÄ Ensemble improvement over best single model: +{gain:.2f}% mAP@0.5')
print(f'   No additional training required ‚Äî inference-time fusion only.')