# 11 — YOLOv8 Baseline Comparison

**Purpose:** Train a YOLOv8 model on the same AGAR `total` dataset to serve as
an independent architecture comparison (reviewer concern: only Detectron2 models).

**Plan:**
- Convert COCO annotations to YOLO format
- Train YOLOv8m (medium) with 3 seeds for variance
- Evaluate on the same test set and compare with Detectron2 best model

**Run on:** Google Colab with GPU

In [None]:
import sys, os

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Add project root to path
PROJECT_ROOT = '/content/drive/MyDrive/TESE'
if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

In [None]:
!pip install ultralytics -q

In [None]:
import json
import shutil
import datetime
import numpy as np
import pandas as pd
from pathlib import Path
from collections import defaultdict
from ultralytics import YOLO

import config
from config import AGAR_IMG_DIR, AGAR_DATASETS, OUTPUTS_DIR, RESULTS_DIR

print(f"AGAR image dir: {AGAR_IMG_DIR}")
print(f"Outputs dir: {OUTPUTS_DIR}")

## Step 1: Convert COCO annotations to YOLO format

YOLO expects:
```
dataset/
  images/
    train/  val/  test/
  labels/
    train/  val/  test/
  data.yaml
```

Each label `.txt` file has one line per object: `class_id x_center y_center width height` (normalized 0-1).

In [None]:
def coco_to_yolo(coco_json_path: str, output_labels_dir: str, output_images_dir: str,
                 source_images_dir: str, copy_images: bool = True) -> int:
    """
    Convert a COCO-format annotation file to YOLO-format label files.
    
    Creates one .txt file per image with bounding boxes in YOLO format:
    class_id x_center y_center width height (all normalized 0-1)
    
    Args:
        coco_json_path: Path to COCO JSON annotation file
        output_labels_dir: Directory to write YOLO .txt label files
        output_images_dir: Directory to symlink/copy images
        source_images_dir: Directory containing the source images
        copy_images: If True, create symlinks to images (False = skip)
    
    Returns:
        Number of images processed
    """
    os.makedirs(output_labels_dir, exist_ok=True)
    os.makedirs(output_images_dir, exist_ok=True)
    
    with open(coco_json_path, 'r') as f:
        coco = json.load(f)
    
    # Build category mapping: COCO category_id -> 0-indexed class_id
    cat_ids = sorted([c['id'] for c in coco['categories']])
    cat_map = {cid: idx for idx, cid in enumerate(cat_ids)}
    
    # Build image lookup
    img_lookup = {img['id']: img for img in coco['images']}
    
    # Group annotations by image
    anns_by_img = defaultdict(list)
    for ann in coco['annotations']:
        anns_by_img[ann['image_id']].append(ann)
    
    count = 0
    for img_id, img_info in img_lookup.items():
        img_w = img_info['width']
        img_h = img_info['height']
        file_name = img_info['file_name']
        stem = os.path.splitext(file_name)[0]
        
        # Write YOLO label file
        label_path = os.path.join(output_labels_dir, f"{stem}.txt")
        with open(label_path, 'w') as lf:
            for ann in anns_by_img.get(img_id, []):
                # COCO bbox: [x, y, width, height] (top-left corner)
                x, y, w, h = ann['bbox']
                
                # Convert to YOLO: center_x, center_y, w, h (normalized)
                cx = (x + w / 2) / img_w
                cy = (y + h / 2) / img_h
                nw = w / img_w
                nh = h / img_h
                
                # Clamp to [0, 1]
                cx = max(0, min(1, cx))
                cy = max(0, min(1, cy))
                nw = max(0, min(1, nw))
                nh = max(0, min(1, nh))
                
                class_id = cat_map[ann['category_id']]
                lf.write(f"{class_id} {cx:.6f} {cy:.6f} {nw:.6f} {nh:.6f}\n")
        
        # Symlink or copy image
        if copy_images:
            src = os.path.join(source_images_dir, file_name)
            dst = os.path.join(output_images_dir, file_name)
            if not os.path.exists(dst) and os.path.exists(src):
                os.symlink(src, dst)
        
        count += 1
    
    return count

print("Conversion function defined.")

In [None]:
# ── Convert AGAR 'total' dataset to YOLO format ──
YOLO_DATASET_DIR = os.path.join(OUTPUTS_DIR, "yolo_agar_total")

total_paths = AGAR_DATASETS['total']

for split, json_path in [('train', total_paths['train']),
                          ('val', total_paths['val']),
                          ('test', total_paths['test'])]:
    labels_dir = os.path.join(YOLO_DATASET_DIR, 'labels', split)
    images_dir = os.path.join(YOLO_DATASET_DIR, 'images', split)
    
    n = coco_to_yolo(
        coco_json_path=json_path,
        output_labels_dir=labels_dir,
        output_images_dir=images_dir,
        source_images_dir=AGAR_IMG_DIR,
        copy_images=True
    )
    print(f"{split}: converted {n} images")

# Get class names from COCO JSON
with open(total_paths['train'], 'r') as f:
    coco_data = json.load(f)
class_names = [c['name'] for c in sorted(coco_data['categories'], key=lambda c: c['id'])]
print(f"Classes: {class_names}")

In [None]:
# ── Create data.yaml for YOLO ──
import yaml

data_yaml = {
    'path': YOLO_DATASET_DIR,
    'train': 'images/train',
    'val': 'images/val',
    'test': 'images/test',
    'nc': len(class_names),
    'names': class_names,
}

yaml_path = os.path.join(YOLO_DATASET_DIR, 'data.yaml')
with open(yaml_path, 'w') as f:
    yaml.dump(data_yaml, f, default_flow_style=False)

print(f"Saved data.yaml to: {yaml_path}")
print(f"\nContents:")
with open(yaml_path, 'r') as f:
    print(f.read())

## Step 2: Train YOLOv8 with multiple seeds

Using YOLOv8m (medium) for a fair comparison with R50/R101 backbone models.

Hyperparameters are set to be as comparable as possible to the Detectron2 config:
- Same batch size (8)
- Comparable iterations (~2800 ≈ ~10 epochs with this dataset)
- Same image resolution (YOLO default 640; Detectron2 short-side 800)

In [None]:
# ── Training configuration ──
SEEDS = [42, 123, 456]
YOLO_MODEL = 'yolov8m.pt'  # medium: ~25M params, comparable to R50
EPOCHS = 100  # YOLO uses epochs, not iterations; EarlyStopping will stop if plateaued
BATCH_SIZE = 8
IMG_SIZE = 640  # YOLO default; Detectron2 uses short-side 800
PATIENCE = 20  # Early stopping patience

# Where to save outputs
YOLO_OUTPUTS_DIR = os.path.join(OUTPUTS_DIR, 'yolo_runs')
os.makedirs(YOLO_OUTPUTS_DIR, exist_ok=True)

print(f"Model: {YOLO_MODEL}")
print(f"Epochs: {EPOCHS} (with early stopping patience={PATIENCE})")
print(f"Batch size: {BATCH_SIZE}")
print(f"Seeds: {SEEDS}")
print(f"Output dir: {YOLO_OUTPUTS_DIR}")

In [None]:
# ── Train with multiple seeds ──
import torch

yolo_results = []

for seed in SEEDS:
    print(f"\n{'='*60}")
    print(f"Training YOLOv8m | Seed: {seed}")
    print(f"{'='*60}")
    
    run_name = f"yolov8m_agar_total_seed{seed}"
    
    # Load pretrained model
    model = YOLO(YOLO_MODEL)
    
    # Train
    results = model.train(
        data=yaml_path,
        epochs=EPOCHS,
        batch=BATCH_SIZE,
        imgsz=IMG_SIZE,
        seed=seed,
        patience=PATIENCE,
        project=YOLO_OUTPUTS_DIR,
        name=run_name,
        exist_ok=True,
        # Comparable settings
        lr0=0.01,       # YOLO default initial LR
        lrf=0.01,       # final LR = lr0 * lrf
        momentum=0.937, # YOLO default SGD momentum
        weight_decay=0.0005,
        warmup_epochs=3,
        warmup_momentum=0.8,
        warmup_bias_lr=0.1,
        # Augmentation
        hsv_h=0.015,
        hsv_s=0.7,
        hsv_v=0.4,
        degrees=0.0,
        translate=0.1,
        scale=0.5,
        fliplr=0.5,
        mosaic=1.0,
        # Save settings
        save=True,
        save_period=-1,  # save only best and last
        plots=True,
        verbose=True,
    )
    
    # Store the output directory
    run_dir = os.path.join(YOLO_OUTPUTS_DIR, run_name)
    print(f"\nTraining complete. Output: {run_dir}")
    
    # Clean up GPU memory
    del model
    torch.cuda.empty_cache()
    
    yolo_results.append({
        'seed': seed,
        'run_name': run_name,
        'run_dir': run_dir,
    })
    
    # Save intermediate (in case Colab disconnects)
    intermediate_path = os.path.join(RESULTS_DIR, 'yolo_training_partial.json')
    os.makedirs(RESULTS_DIR, exist_ok=True)
    with open(intermediate_path, 'w') as f:
        json.dump(yolo_results, f, indent=2)

print(f"\n\nAll {len(SEEDS)} YOLO training runs complete!")

## Step 3: Evaluate on the test set

In [None]:
# ── Evaluate each trained model on the test set ──
test_results = []

for entry in yolo_results:
    seed = entry['seed']
    run_dir = entry['run_dir']
    weights_path = os.path.join(run_dir, 'weights', 'best.pt')
    
    print(f"\nEvaluating seed {seed}: {weights_path}")
    
    model = YOLO(weights_path)
    
    # Validate on test split
    metrics = model.val(
        data=yaml_path,
        split='test',
        batch=BATCH_SIZE,
        imgsz=IMG_SIZE,
        conf=0.001,   # low conf for mAP calculation
        iou=0.5,      # IoU threshold for matching
        max_det=100,  # match Detectron2 setting
        plots=True,
        save_json=True,  # save COCO-format predictions for comparison
        project=YOLO_OUTPUTS_DIR,
        name=f"test_seed{seed}",
        exist_ok=True,
    )
    
    # Extract metrics
    # YOLO metrics.box gives mAP50, mAP50-95, precision, recall
    result = {
        'seed': seed,
        'mAP50': float(metrics.box.map50) * 100,       # convert to percentage
        'mAP50_95': float(metrics.box.map) * 100,       # COCO-style mAP
        'mAP75': float(metrics.box.map75) * 100,
        'precision': float(metrics.box.mp) * 100,
        'recall': float(metrics.box.mr) * 100,
        'weights': weights_path,
    }
    
    # Per-class AP50
    for i, name in enumerate(class_names):
        if i < len(metrics.box.ap50):
            result[f'AP50_{name}'] = float(metrics.box.ap50[i]) * 100
    
    test_results.append(result)
    print(f"  mAP50={result['mAP50']:.1f}  mAP50-95={result['mAP50_95']:.1f}")
    
    del model
    torch.cuda.empty_cache()

print("\nAll evaluations complete!")

In [None]:
# ── Summary table ──
df_yolo = pd.DataFrame(test_results)

print("=== YOLOv8m Results (per seed) ===")
print(df_yolo[['seed', 'mAP50', 'mAP50_95', 'mAP75', 'precision', 'recall']].to_string(index=False))

print("\n=== YOLOv8m Summary (mean ± std) ===")
for metric in ['mAP50', 'mAP50_95', 'mAP75', 'precision', 'recall']:
    mean_val = df_yolo[metric].mean()
    std_val = df_yolo[metric].std()
    print(f"  {metric}: {mean_val:.1f} ± {std_val:.1f}")

# Save
yolo_test_path = os.path.join(RESULTS_DIR, 'yolo_test_results.json')
with open(yolo_test_path, 'w') as f:
    json.dump(test_results, f, indent=2)

csv_path = os.path.join(RESULTS_DIR, 'yolo_test_results.csv')
df_yolo.to_csv(csv_path, index=False)

print(f"\nSaved to: {yolo_test_path}")
print(f"Saved to: {csv_path}")

## Step 4: Cross-architecture comparison

In [None]:
import matplotlib.pyplot as plt

# ── Load Detectron2 multi-seed results (if available) ──
d2_results_path = os.path.join(RESULTS_DIR, 'multi_seed_results.json')

if os.path.exists(d2_results_path):
    with open(d2_results_path, 'r') as f:
        d2_results = json.load(f)
    
    # Build comparison DataFrame
    comparison = []
    
    # Detectron2 models
    for r in d2_results:
        comparison.append({
            'Model': r['model'].replace('total_', ''),
            'Framework': 'Detectron2',
            'Seed': r['seed'],
            'AP': r['AP'],
            'AP50': r['AP50'],
            'AP75': r['AP75'],
        })
    
    # YOLOv8
    for r in test_results:
        comparison.append({
            'Model': 'YOLOv8m',
            'Framework': 'Ultralytics',
            'Seed': r['seed'],
            'AP': r['mAP50_95'],
            'AP50': r['mAP50'],
            'AP75': r['mAP75'],
        })
    
    df_comp = pd.DataFrame(comparison)
    
    # Summary
    print("=== Cross-Architecture Comparison (mean ± std) ===")
    for model in df_comp['Model'].unique():
        sub = df_comp[df_comp['Model'] == model]
        fw = sub['Framework'].iloc[0]
        print(f"\n{model} ({fw}):")
        for m in ['AP', 'AP50', 'AP75']:
            mean_val = sub[m].mean()
            std_val = sub[m].std()
            print(f"  {m}: {mean_val:.1f} ± {std_val:.1f}")
    
    # ── Grouped bar chart ──
    fig, axes = plt.subplots(1, 3, figsize=(16, 5))
    models = df_comp['Model'].unique()
    colors = ['#3878a2', '#e2a83e', '#59a864']
    
    for ax, metric in zip(axes, ['AP', 'AP50', 'AP75']):
        means = [df_comp[df_comp['Model'] == m][metric].mean() for m in models]
        stds = [df_comp[df_comp['Model'] == m][metric].std() for m in models]
        
        bars = ax.bar(range(len(models)), means, yerr=stds, capsize=5,
                      color=colors[:len(models)], edgecolor='black', linewidth=0.5)
        
        for i, m in enumerate(models):
            vals = df_comp[df_comp['Model'] == m][metric].values
            ax.scatter([i]*len(vals), vals, color='black', s=30, zorder=5)
        
        ax.set_xticks(range(len(models)))
        ax.set_xticklabels(models, rotation=20, ha='right')
        ax.set_ylabel(metric)
        ax.set_title(f'{metric} Comparison')
        ax.grid(axis='y', alpha=0.3)
    
    plt.tight_layout()
    plot_path = os.path.join(RESULTS_DIR, 'cross_architecture_comparison.png')
    plt.savefig(plot_path, dpi=300, bbox_inches='tight')
    print(f"\nSaved plot: {plot_path}")
    plt.show()
    
    # Save comparison
    comp_path = os.path.join(RESULTS_DIR, 'cross_architecture_comparison.csv')
    df_comp.to_csv(comp_path, index=False)
    print(f"Saved: {comp_path}")
    
else:
    print(f"Detectron2 multi-seed results not found at: {d2_results_path}")
    print("Run notebook 10 first, then re-run this cell.")

## Summary

This notebook produces:
1. **YOLO-format dataset** at `outputs_detectron2/yolo_agar_total/`
2. **3 trained YOLOv8m models** at `outputs_detectron2/yolo_runs/yolov8m_agar_total_seed{42,123,456}/`
3. **Test evaluations** saved to `results/yolo_test_results.{json,csv}`
4. **Cross-architecture comparison** saved to `results/cross_architecture_comparison.{csv,png}`

Key comparison point for the paper:
- If Detectron2 AP ≈ YOLO AP → findings are architecture-independent (strong claim)
- If Detectron2 AP > YOLO AP → justify why (anchor-free vs anchor-based, resolution, augmentation)
- If YOLO AP > Detectron2 AP → discuss and acknowledge