# 03 - Evaluación del Modelo

Evaluación detallada del modelo entrenado.

## Contenido
1. Cargar modelo y dataset
2. Ejecutar evaluación
3. Analizar predicciones
4. Visualizar errores

In [None]:
import sys
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import cv2
from PIL import Image

PROJECT_ROOT = Path('..').resolve()
sys.path.insert(0, str(PROJECT_ROOT))

from ultralytics import YOLO

MODELS_DIR = PROJECT_ROOT / 'models'
DATA_DIR = PROJECT_ROOT / 'data'

print(f"Modelos disponibles:")
for m in MODELS_DIR.glob('*'):
    size_mb = m.stat().st_size / (1024*1024)
    print(f"  - {m.name} ({size_mb:.1f} MB)")

## 1. Cargar Modelo

In [None]:
# Buscar mejor modelo .pt
pt_models = sorted(MODELS_DIR.glob('*.pt'), reverse=True)
if pt_models:
    model_path = pt_models[0]
    print(f"Cargando: {model_path.name}")
    model = YOLO(str(model_path))
    print(f"Modelo cargado correctamente")
else:
    print("No se encontraron modelos .pt")

## 2. Ejecutar Evaluación en Validación

In [None]:
# Evaluar en dataset de validación
data_yaml = DATA_DIR / 'pillar.yaml'

results = model.val(
    data=str(data_yaml),
    split='val',
    conf=0.001,  # Bajo para evaluación completa
    iou=0.6,
    verbose=True
)

In [None]:
# Mostrar métricas
print("="*50)
print("MÉTRICAS DE EVALUACIÓN")
print("="*50)
print(f"Precision:  {results.box.mp:.4f} ({results.box.mp*100:.1f}%)")
print(f"Recall:     {results.box.mr:.4f} ({results.box.mr*100:.1f}%)")
print(f"mAP@50:     {results.box.map50:.4f} ({results.box.map50*100:.1f}%)")
print(f"mAP@50-95:  {results.box.map:.4f} ({results.box.map*100:.1f}%)")

# F1 Score
if results.box.mp > 0 and results.box.mr > 0:
    f1 = 2 * (results.box.mp * results.box.mr) / (results.box.mp + results.box.mr)
    print(f"F1 Score:   {f1:.4f} ({f1*100:.1f}%)")

## 3. Analizar Predicciones por Confianza

In [None]:
# Obtener predicciones en algunas imágenes
val_images = list((DATA_DIR / 'dataset' / 'val' / 'images').glob('*.jpg'))[:20]

all_confidences = []
for img_path in val_images:
    results = model(str(img_path), verbose=False)
    if len(results[0].boxes) > 0:
        confs = results[0].boxes.conf.cpu().numpy()
        all_confidences.extend(confs)

if all_confidences:
    plt.figure(figsize=(10, 4))
    plt.hist(all_confidences, bins=20, edgecolor='black', alpha=0.7)
    plt.axvline(x=0.65, color='r', linestyle='--', label='Threshold (0.65)')
    plt.xlabel('Confianza')
    plt.ylabel('Frecuencia')
    plt.title('Distribución de Confianza en Predicciones')
    plt.legend()
    plt.show()
    
    print(f"\nEstadísticas de confianza:")
    print(f"  Min: {min(all_confidences):.3f}")
    print(f"  Max: {max(all_confidences):.3f}")
    print(f"  Media: {np.mean(all_confidences):.3f}")
    print(f"  Mediana: {np.median(all_confidences):.3f}")

## 4. Visualizar Predicciones

In [None]:
def show_predictions(img_path, conf_threshold=0.65):
    """Muestra predicciones en una imagen."""
    results = model(str(img_path), conf=conf_threshold, verbose=False)
    
    # Imagen con anotaciones
    annotated = results[0].plot()
    annotated = cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB)
    
    plt.figure(figsize=(12, 8))
    plt.imshow(annotated)
    plt.title(f"{img_path.name} - {len(results[0].boxes)} detecciones")
    plt.axis('off')
    plt.show()
    
    return results[0]

# Mostrar algunas predicciones
sample_images = val_images[:4]
for img_path in sample_images:
    show_predictions(img_path)

## 5. Análisis de Errores

In [None]:
def count_annotations(label_path):
    """Cuenta anotaciones en un archivo YOLO."""
    if not label_path.exists():
        return 0
    with open(label_path) as f:
        return len([l for l in f if l.strip()])

# Comparar predicciones vs ground truth
errors = {'false_positives': [], 'false_negatives': [], 'correct': []}

for img_path in val_images:
    label_path = img_path.parent.parent / 'labels' / f"{img_path.stem}.txt"
    gt_count = count_annotations(label_path)
    
    results = model(str(img_path), conf=0.65, verbose=False)
    pred_count = len(results[0].boxes)
    
    diff = pred_count - gt_count
    if diff > 0:
        errors['false_positives'].append((img_path.name, diff))
    elif diff < 0:
        errors['false_negatives'].append((img_path.name, -diff))
    else:
        errors['correct'].append(img_path.name)

print(f"Análisis de {len(val_images)} imágenes:")
print(f"  Correctas: {len(errors['correct'])}")
print(f"  Con falsos positivos: {len(errors['false_positives'])}")
print(f"  Con falsos negativos: {len(errors['false_negatives'])}")

In [None]:
# Mostrar ejemplos de errores
if errors['false_negatives']:
    print("\nImágenes con detecciones faltantes:")
    for name, count in errors['false_negatives'][:5]:
        print(f"  {name}: {count} faltantes")

if errors['false_positives']:
    print("\nImágenes con detecciones extra:")
    for name, count in errors['false_positives'][:5]:
        print(f"  {name}: {count} extra")

## 6. Velocidad de Inferencia

In [None]:
import time
import torch

# Warmup
dummy = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
for _ in range(5):
    model(dummy, verbose=False)

if torch.cuda.is_available():
    torch.cuda.synchronize()

# Benchmark
times = []
for _ in range(50):
    start = time.perf_counter()
    model(dummy, verbose=False)
    if torch.cuda.is_available():
        torch.cuda.synchronize()
    times.append(time.perf_counter() - start)

mean_ms = np.mean(times) * 1000
fps = 1000 / mean_ms

print(f"Velocidad de inferencia ({model_path.name}):")
print(f"  Media: {mean_ms:.2f} ms")
print(f"  FPS: {fps:.1f}")