# Evaluate YOLOv8 Detection Models
**Author:** G8  
**Task:** 3.2 - YOLOv8 Evaluation  
**Timeline:** Feb 10, 2025  

**Purpose:**
- Evaluate all 3 YOLOv8 models
- Calculate mAP50, mAP50-95, precision, recall
- Measure inference speed (FPS)
- Compare model performance

In [None]:
import os
import json
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from ultralytics import YOLO
import time

print("Libraries imported!")

In [None]:
# Config
PROJECT_ROOT = Path.cwd().parent if 'notebooks' in str(Path.cwd()) else Path.cwd()
DATA_YAML = PROJECT_ROOT / "data" / "data.yaml"
MODELS_PATH = PROJECT_ROOT / "models" / "detection"
RESULTS_PATH = PROJECT_ROOT / "results" / "detection"

(RESULTS_PATH / 'metrics').mkdir(parents=True, exist_ok=True)
(RESULTS_PATH / 'visualizations').mkdir(parents=True, exist_ok=True)
(RESULTS_PATH / 'predictions').mkdir(parents=True, exist_ok=True)

print(f"Results path: {RESULTS_PATH}")

## Evaluate Each Model

In [None]:
def evaluate_yolo_model(variant):
    """Evaluate one YOLOv8 model"""
    print("\n" + "="*80)
    print(f"EVALUATING YOLOV8{variant.upper()}")
    print("="*80)
    
    # Load model
    model_path = MODELS_PATH / f"yolov8{variant}_best.pt"
    print(f"\nLoading: {model_path.name}")
    model = YOLO(model_path)
    
    # Run validation
    print("\nRunning validation...")
    metrics = model.val(
        data=str(DATA_YAML),
        split='test',
        save_json=True,
        conf=0.25,
        iou=0.6,
        plots=True
    )
    
    # Extract metrics
    results = {
        'model': f'yolov8{variant}',
        'mAP50': float(metrics.box.map50),
        'mAP50_95': float(metrics.box.map),
        'precision': float(metrics.box.mp),
        'recall': float(metrics.box.mr),
    }
    
    # Measure speed
    print("\nMeasuring inference speed...")
    test_imgs = list((PROJECT_ROOT / "data" / "multi_objects" / "images" / "test").glob("*.jpg"))
    
    if len(test_imgs) > 0:
        # Warmup
        for _ in range(5):
            _ = model(test_imgs[0], verbose=False)
        
        # Measure
        start = time.time()
        num_runs = 50
        for i in range(num_runs):
            _ = model(test_imgs[i % len(test_imgs)], verbose=False)
        elapsed = time.time() - start
        
        results['inference_fps'] = float(num_runs / elapsed)
        results['inference_ms'] = float((elapsed / num_runs) * 1000)
    
    # Model size
    results['model_size_mb'] = float(os.path.getsize(model_path) / (1024 * 1024))
    
    # Print summary
    print("\n" + "-"*80)
    print("METRICS SUMMARY")
    print("-"*80)
    print(f"mAP50:      {results['mAP50']:.4f}")
    print(f"mAP50-95:   {results['mAP50_95']:.4f}")
    print(f"Precision:  {results['precision']:.4f}")
    print(f"Recall:     {results['recall']:.4f}")
    print(f"FPS:        {results['inference_fps']:.1f}")
    print(f"Latency:    {results['inference_ms']:.2f} ms")
    print(f"Model size: {results['model_size_mb']:.2f} MB")
    
    # Save metrics
    json_path = RESULTS_PATH / 'metrics' / f"yolov8{variant}_metrics.json"
    with open(json_path, 'w') as f:
        json.dump(results, f, indent=2)
    print(f"\nMetrics saved: {json_path.name}")
    
    return results

# Evaluate all models
print("="*80)
print("EVALUATION PIPELINE")
print("="*80)

all_metrics = []
for variant in ['n', 's', 'm']:
    metrics = evaluate_yolo_model(variant)
    all_metrics.append(metrics)

## Model Comparison

In [None]:
print("="*80)
print("MODEL COMPARISON")
print("="*80)

df_comp = pd.DataFrame(all_metrics)
df_comp = df_comp.sort_values('mAP50', ascending=False)
df_comp['rank'] = range(1, len(df_comp) + 1)

# Reorder
cols = ['rank', 'model', 'mAP50', 'mAP50_95', 'precision', 'recall',
        'inference_fps', 'inference_ms', 'model_size_mb']
df_comp = df_comp[cols]

print("\n" + df_comp.to_string(index=False))

# Save Excel
excel_path = RESULTS_PATH / 'metrics' / 'yolo_comparison.xlsx'
df_comp.to_excel(excel_path, index=False, sheet_name='YOLOv8_Comparison')
print(f"\nSaved: {excel_path.name}")

# Best model
best = df_comp.iloc[0]
print("\n" + "="*80)
print("BEST DETECTION MODEL")
print("="*80)
print(f"Model: {best['model']}")
print(f"mAP50: {best['mAP50']:.4f}")
print(f"FPS: {best['inference_fps']:.1f}")

## Visualization

In [None]:
# Comparison charts
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# mAP50 comparison
ax1 = axes[0]
bars1 = ax1.barh(df_comp['model'], df_comp['mAP50'])
ax1.set_xlabel('mAP50', fontsize=12)
ax1.set_title('Detection Accuracy (mAP50)', fontsize=14, fontweight='bold')
ax1.axvline(x=0.85, color='r', linestyle='--', label='Target (0.85)')
ax1.legend()
ax1.grid(True, alpha=0.3, axis='x')

for i, bar in enumerate(bars1):
    width = bar.get_width()
    ax1.text(width, bar.get_y() + bar.get_height()/2,
             f'{width:.3f}', ha='left', va='center')

# FPS comparison
ax2 = axes[1]
bars2 = ax2.barh(df_comp['model'], df_comp['inference_fps'])
ax2.set_xlabel('FPS (Frames Per Second)', fontsize=12)
ax2.set_title('Inference Speed', fontsize=14, fontweight='bold')
ax2.grid(True, alpha=0.3, axis='x')

for i, bar in enumerate(bars2):
    width = bar.get_width()
    ax2.text(width, bar.get_y() + bar.get_height()/2,
             f'{width:.1f}', ha='left', va='center')

plt.tight_layout()
plt.savefig(RESULTS_PATH / 'metrics' / 'yolo_comparison.png', dpi=150)
plt.show()

print("Comparison chart saved")

## Task 3.2 Complete!

**Next:** Task 4 - Build Streamlit application (Kevin)