## 1. Setup and Load Models

In [None]:
import warnings
warnings.filterwarnings('ignore')

from ultralytics import YOLO
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import json

# Style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)

MODELS_DIR = Path('models')
DETECTION_WEIGHTS = MODELS_DIR / 'detector' / 'flower_detector' / 'weights' / 'best.pt'
CLASSIFIER_WEIGHTS = MODELS_DIR / 'classifier' / 'flower_classifier' / 'weights' / 'best.pt'

# Load models
print("Loading trained models...")
detector = YOLO(str(DETECTION_WEIGHTS))
classifier = YOLO(str(CLASSIFIER_WEIGHTS))
print("✓ Models loaded")

## 2. Detection Model Metrics

In [None]:
# Validate detection model
print("Evaluating Detection Model...\n")
det_metrics = detector.val(data='data/detection/dataset.yaml')

# Extract metrics
metrics_dict = {
    'mAP@0.5': float(det_metrics.box.map50),
    'mAP@0.5-0.95': float(det_metrics.box.map),
    'Precision': float(det_metrics.box.mp),
    'Recall': float(det_metrics.box.mr),
}

# Visualize metrics
fig, ax = plt.subplots(figsize=(10, 6))
metrics_names = list(metrics_dict.keys())
metrics_values = list(metrics_dict.values())

colors = ['green' if v > 0.85 else 'orange' if v > 0.75 else 'red' for v in metrics_values]
bars = ax.bar(metrics_names, metrics_values, color=colors, alpha=0.7, edgecolor='black')

# Add value labels
for bar, val in zip(bars, metrics_values):
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height,
            f'{val:.4f}', ha='center', va='bottom', fontweight='bold')

ax.set_ylim(0, 1.0)
ax.axhline(y=0.85, color='g', linestyle='--', alpha=0.5, label='Target (0.85)')
ax.set_ylabel('Score', fontsize=12)
ax.set_title('Detection Model Performance Metrics', fontsize=14, fontweight='bold')
ax.legend()
plt.tight_layout()
plt.show()

print("Detection Model Metrics:")
for metric, value in metrics_dict.items():
    status = "✓" if value > 0.85 else "⚠" if value > 0.75 else "✗"
    print(f"  {status} {metric:<20} {value:.4f}")

## 3. Classification Model Metrics

In [None]:
# Validate classification model
print("Evaluating Classification Model...\n")
clf_metrics = classifier.val(data='data/classification')

# Extract metrics
clf_dict = {
    'Top-1 Accuracy': float(clf_metrics.top1),
    'Top-5 Accuracy': float(clf_metrics.top5),
}

# Visualize metrics
fig, ax = plt.subplots(figsize=(10, 6))
clf_names = list(clf_dict.keys())
clf_values = list(clf_dict.values())

colors = ['green' if v > 0.85 else 'orange' if v > 0.75 else 'red' for v in clf_values]
bars = ax.bar(clf_names, clf_values, color=colors, alpha=0.7, edgecolor='black')

for bar, val in zip(bars, clf_values):
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height,
            f'{val:.4f}', ha='center', va='bottom', fontweight='bold')

ax.set_ylim(0, 1.0)
ax.axhline(y=0.85, color='g', linestyle='--', alpha=0.5, label='Target (0.85)')
ax.set_ylabel('Accuracy', fontsize=12)
ax.set_title('Classification Model Performance', fontsize=14, fontweight='bold')
ax.legend()
plt.tight_layout()
plt.show()

print("Classification Model Metrics:")
for metric, value in clf_dict.items():
    status = "✓" if value > 0.85 else "⚠" if value > 0.75 else "✗"
    print(f"  {status} {metric:<20} {value:.4f}")

## 4. Inference Speed Analysis

In [None]:
import time
from pathlib import Path

# Get sample images
test_dir = Path('data/detection/images/val')
test_images = list(test_dir.glob('*.jpg'))[:10]

print("Measuring inference speed...\n")

# Test detection model
det_times = []
for img_path in test_images:
    start = time.time()
    results = detector.predict(source=str(img_path), imgsz=640, verbose=False)
    det_times.append((time.time() - start) * 1000)  # Convert to ms

det_avg = np.mean(det_times)
det_std = np.std(det_times)

# Test classification
clf_times = []
for img_path in test_images[:5]:
    start = time.time()
    results = classifier.predict(source=str(img_path), imgsz=224, verbose=False)
    clf_times.append((time.time() - start) * 1000)

clf_avg = np.mean(clf_times)
clf_std = np.std(clf_times)

# Visualize
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Detection timing
axes[0].hist(det_times, bins=10, color='skyblue', edgecolor='black', alpha=0.7)
axes[0].axvline(det_avg, color='red', linestyle='--', linewidth=2, label=f'Mean: {det_avg:.2f}ms')
axes[0].set_xlabel('Inference Time (ms)', fontsize=12)
axes[0].set_ylabel('Frequency', fontsize=12)
axes[0].set_title('Detection Model Inference Time Distribution', fontweight='bold')
axes[0].legend()
axes[0].grid(alpha=0.3)

# Classification timing
axes[1].hist(clf_times, bins=5, color='lightcoral', edgecolor='black', alpha=0.7)
axes[1].axvline(clf_avg, color='red', linestyle='--', linewidth=2, label=f'Mean: {clf_avg:.2f}ms')
axes[1].set_xlabel('Inference Time (ms)', fontsize=12)
axes[1].set_ylabel('Frequency', fontsize=12)
axes[1].set_title('Classification Model Inference Time', fontweight='bold')
axes[1].legend()
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.show()

print(f"Detection Model:")
print(f"  Average: {det_avg:.2f}ms")
print(f"  Std Dev: {det_std:.2f}ms")
print(f"  FPS: {1000/det_avg:.1f}")
print(f"  Target: <33ms (30 FPS) {'✓' if det_avg < 33 else '✗'}")

print(f"\nClassification Model:")
print(f"  Average: {clf_avg:.2f}ms")
print(f"  Std Dev: {clf_std:.2f}ms")
print(f"  FPS: {1000/clf_avg:.1f}")
print(f"  Target: <10ms {'✓' if clf_avg < 10 else '⚠'}")

## 5. Sample Predictions Visualization

In [None]:
# Visualize predictions
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()

test_images = list(Path('data/detection/images/val').glob('*.jpg'))[:6]

for idx, img_path in enumerate(test_images):
    results = detector.predict(source=str(img_path), conf=0.5, verbose=False)
    
    # Get annotated image
    result = results[0]
    annotated = result.plot()
    
    axes[idx].imshow(annotated[:, :, ::-1])  # Convert BGR to RGB
    axes[idx].set_title(f'{img_path.name}', fontsize=10)
    axes[idx].axis('off')

plt.tight_layout()
plt.show()

print("✓ Sample predictions visualized")

## 6. Summary Report

In [None]:
# Generate evaluation summary
summary = {
    'timestamp': pd.Timestamp.now().isoformat(),
    'detection_model': {
        'metrics': metrics_dict,
        'inference_time_ms': float(det_avg),
        'fps': float(1000/det_avg),
        'status': 'PASS' if metrics_dict['mAP@0.5'] > 0.85 else 'REVIEW'
    },
    'classification_model': {
        'metrics': clf_dict,
        'inference_time_ms': float(clf_avg),
        'fps': float(1000/clf_avg),
        'status': 'PASS' if clf_dict['Top-1 Accuracy'] > 0.85 else 'REVIEW'
    }
}

# Save summary
with open('evaluation_summary.json', 'w') as f:
    json.dump(summary, f, indent=2)

print("\n" + "="*60)
print("EVALUATION SUMMARY")
print("="*60)
print(f"\nDetection Model: {summary['detection_model']['status']}")
print(f"  mAP@0.5: {summary['detection_model']['metrics']['mAP@0.5']:.4f}")
print(f"  Inference: {summary['detection_model']['inference_time_ms']:.2f}ms ({summary['detection_model']['fps']:.1f} FPS)")

print(f"\nClassification Model: {summary['classification_model']['status']}")
print(f"  Top-1 Accuracy: {summary['classification_model']['metrics']['Top-1 Accuracy']:.4f}")
print(f"  Inference: {summary['classification_model']['inference_time_ms']:.2f}ms ({summary['classification_model']['fps']:.1f} FPS)")

print(f"\n✓ Summary saved to: evaluation_summary.json")