# 🚗 TFM - Entrenamiento YOLOv8 (Detección Partes Vehiculares)
**Estrategia:** Entrenamiento en 2 fases para subir mAP@0.5 ≥ 0.75 y Precision ≥ 0.75 manteniendo Recall ≥ 0.75.
Fase 1: Regularización fuerte (generalización).  
Fase 2: Fine-tune de precisión (resolución ↑, augment ↓).

In [ ]:
# =============================================
# 1. Inicialización e Instalación
# =============================================
import os, sys, subprocess, json, time, zipfile, math, random, gc
from pathlib import Path

REQ = ["ultralytics", "pandas", "numpy", "matplotlib", "seaborn", "pyyaml"]
for p in REQ:
    try: __import__(p.split('-')[0])
    except ImportError: subprocess.check_call([sys.executable, "-m", "pip", "install", p])

import torch, yaml, pandas as pd, numpy as np, matplotlib.pyplot as plt, seaborn as sns
from ultralytics import YOLO

IN_COLAB = 'google.colab' in sys.modules
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')

# ------------ PARÁMETROS PRINCIPALES (EDITABLES) ------------
MODEL_SIZE = 'm'              # n / s / m / l / x
PHASE1_EPOCHS = 60
PHASE1_PATIENCE = 20
PHASE2_EPOCHS = 35            # se frenará antes con patience
PHASE2_PATIENCE = 8
IMG_SIZE_PHASE1 = 640
IMG_SIZE_PHASE2 = 768         # subir para refinar boxes
PROJECT_NAME = 'TFM_Resultados'
DATASET_ZIP_NAME = 'dataset_vehicular.zip'
BASE_DRIVE = '/content/drive/MyDrive' if IN_COLAB else str(Path.home())
SEED = 42

# ------------ RUTAS GENERALES ------------
DATASET_ZIP_PATH = os.path.join(BASE_DRIVE, 'TFM_Dataset', DATASET_ZIP_NAME)
DATA_EXTRACT_DIR = '/content/dataset_extracted' if IN_COLAB else './dataset_extracted'
RESULTS_ROOT = os.path.join(BASE_DRIVE, PROJECT_NAME)
os.makedirs(DATA_EXTRACT_DIR, exist_ok=True)
os.makedirs(RESULTS_ROOT, exist_ok=True)

def set_seed(s=SEED):
    random.seed(s); np.random.seed(s); torch.manual_seed(s)
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(s)
set_seed()

if torch.cuda.is_available():
    GPU_NAME = torch.cuda.get_device_name(0)
    VRAM_GB = torch.cuda.get_device_properties(0).total_memory/1024**3
else:
    GPU_NAME, VRAM_GB = 'CPU', 0
print(f"🚀 GPU: {GPU_NAME} | VRAM: {VRAM_GB:.1f} GB")

def suggest_batch(name:str):
    n = name.lower()
    if 't4' in n: return 24
    if 'p100' in n: return 32
    if 'v100' in n or 'a100' in n: return 40
    return 16
BASE_BATCH = suggest_batch(GPU_NAME)
print(f"📦 Batch sugerido Fase1: {BASE_BATCH}")
print(f"📦 Batch sugerido Fase2: {max(8, BASE_BATCH-4)}")

In [ ]:
# =============================================
# 2. Dataset: Descompresión + Validación
# =============================================
assert os.path.exists(DATASET_ZIP_PATH), f"ZIP no encontrado: {DATASET_ZIP_PATH}"
print(f"📦 Descomprimiendo dataset: {DATASET_ZIP_PATH}")
with zipfile.ZipFile(DATASET_ZIP_PATH,'r') as z: z.extractall(DATA_EXTRACT_DIR)
print(f"✅ Extraído en: {DATA_EXTRACT_DIR}")

# Localizar data.yaml
data_yaml = None
for r,_,files in os.walk(DATA_EXTRACT_DIR):
    if 'data.yaml' in files:
        data_yaml = os.path.join(r,'data.yaml'); break
assert data_yaml, 'data.yaml no encontrado'
print(f"📄 data.yaml: {data_yaml}")

with open(data_yaml,'r') as f: data_cfg = yaml.safe_load(f)
root_yaml = os.path.dirname(data_yaml)
for k in ['train','val','test']:
    if k in data_cfg and data_cfg[k] and not os.path.isabs(data_cfg[k]):
        data_cfg[k] = os.path.normpath(os.path.join(root_yaml, data_cfg[k]))

FINAL_DATA_YAML = os.path.join(DATA_EXTRACT_DIR,'data_final.yaml')
with open(FINAL_DATA_YAML,'w') as f: yaml.safe_dump(data_cfg,f)
print("🔍 Dataset Config:")
print(' nc:', data_cfg.get('nc'))
print(' names:', data_cfg.get('names'))

def count_images(p):
    if not p or not os.path.exists(p): return 0
    exts = {'.jpg','.jpeg','.png'}
    return sum(1 for f in os.listdir(p) if os.path.splitext(f)[1].lower() in exts)
print(' 🖼 Train images:', count_images(data_cfg.get('train','')))
print(' 🖼 Val images  :', count_images(data_cfg.get('val','')))

In [ ]:
# =============================================
# 3. Fase 1 - Configuración y Entrenamiento
# =============================================
phase1_name = f"veh_parts_phase1_{MODEL_SIZE}_{time.strftime('%Y%m%d_%H%M%S')}"
PHASE1_DIR = os.path.join(RESULTS_ROOT, phase1_name)
os.makedirs(PHASE1_DIR, exist_ok=True)

phase1_args = dict(
    epochs=PHASE1_EPOCHS,
    patience=PHASE1_PATIENCE,
    imgsz=IMG_SIZE_PHASE1,
    batch=BASE_BATCH,
    workers=4,
    device=0 if torch.cuda.is_available() else 'cpu',
    amp=True,
    cache=True,
    lr0=0.005,
    lrf=0.01,
    momentum=0.937,
    weight_decay=0.0015,
    warmup_epochs=3,
    label_smoothing=0.10,
    hsv_h=0.01, hsv_s=0.30, hsv_v=0.20,
    degrees=5, translate=0.05, scale=0.15, shear=1.0,
    perspective=0.0, flipud=0.0, fliplr=0.5,
    mosaic=0.30, mixup=0.20, copy_paste=0.25, close_mosaic=20,
    box=7.5, cls=0.9, dfl=1.5,
    optimizer='AdamW',
    project=RESULTS_ROOT,
    name=phase1_name,
    save=True, save_period=10, plots=True, exist_ok=True, verbose=True
)
print("⚙️ Config Fase 1 (resumen):")
print(json.dumps({k: phase1_args[k] for k in ['epochs','batch','patience','mosaic','mixup','copy_paste','cls']}, indent=2))

base_model = f"yolov8{MODEL_SIZE}.pt"
print(f"📥 Cargando modelo base: {base_model}")
model_phase1 = YOLO(base_model)
t0 = time.time()
print("🚀 Entrenando Fase 1...")
res_phase1 = model_phase1.train(data=FINAL_DATA_YAML, **phase1_args)
t1 = (time.time()-t0)/60
print(f"✅ Fase 1 completada en {t1:.1f} min")

In [ ]:
# =============================================
# 4. Monitoreo Opcional (ejecutar aparte si se desea)
# =============================================
import pandas as pd, time
def monitor(experiment_dir, interval=25):
    csv_path = os.path.join(experiment_dir,'results.csv')
    last = -1
    print(f"👁️ Monitoreando: {experiment_dir}")
    while True:
        if os.path.exists(csv_path):
            try:
                df = pd.read_csv(csv_path)
                if len(df)>0 and len(df)!=last:
                    last=len(df)
                    print(f"Época {last} | mAP50={df['metrics/mAP50(B)'].iloc[-1]:.3f} | P={df['metrics/precision(B)'].iloc[-1]:.3f} | R={df['metrics/recall(B)'].iloc[-1]:.3f}")
            except Exception as e:
                print('Lectura error:', e)
        else:
            print('Esperando results.csv...')
        time.sleep(interval)

# monitor(PHASE1_DIR)  # Descomentar si se quiere usar mientras entrena

In [ ]:
# =============================================
# 5. Evaluación Fase 1
# =============================================
best_phase1 = os.path.join(PHASE1_DIR,'weights','best.pt')
assert os.path.exists(best_phase1), 'best.pt Fase1 no encontrado'
model_eval1 = YOLO(best_phase1)
print("📊 Evaluando Fase 1 (val)...")
val1 = model_eval1.val(data=FINAL_DATA_YAML, split='val')  # dict metrics
m1 = {
  'mAP50': float(val1.results_dict.get('metrics/mAP50(B)',0)),
  'mAP50_95': float(val1.results_dict.get('metrics/mAP50-95(B)',0)),
  'precision': float(val1.results_dict.get('metrics/precision(B)',0)),
  'recall': float(val1.results_dict.get('metrics/recall(B)',0))
}
print(json.dumps(m1, indent=2))
with open(os.path.join(PHASE1_DIR,'evaluation_phase1.json'),'w') as f: json.dump(m1,f,indent=2)

In [ ]:
# =============================================
# 6. Fase 2 - Fine-Tune de Refinamiento
# =============================================
phase2_name = f"{phase1_name}_finetune"
PHASE2_DIR = os.path.join(RESULTS_ROOT, phase2_name)

phase2_args = dict(
    epochs=PHASE2_EPOCHS,
    patience=PHASE2_PATIENCE,
    imgsz=IMG_SIZE_PHASE2,
    batch=max(8, BASE_BATCH-4),
    workers=4,
    device=0 if torch.cuda.is_available() else 'cpu',
    amp=True,
    cache=False,
    lr0=0.003, lrf=0.01, momentum=0.937, weight_decay=0.0008,
    warmup_epochs=2,
    label_smoothing=0.05,
    hsv_h=0.005, hsv_s=0.20, hsv_v=0.15,
    degrees=3, translate=0.03, scale=0.12, shear=0.5,
    perspective=0.0, flipud=0.0, fliplr=0.5,
    mosaic=0.0, mixup=0.05, copy_paste=0.05, close_mosaic=0,
    box=7.5, cls=1.0, dfl=1.5,
    optimizer='AdamW',
    project=RESULTS_ROOT, name=phase2_name, exist_ok=True,
    save=True, save_period=5, plots=True, verbose=True
)
print("⚙️ Config Fase 2 (resumen):")
print(json.dumps({k: phase2_args[k] for k in ['epochs','imgsz','batch','lr0','mosaic','mixup','cls']}, indent=2))

print(f"📥 Cargando pesos Fase1: {best_phase1}")
model_phase2 = YOLO(best_phase1)
t0 = time.time(); print("🚀 Entrenando Fase 2...")
res_phase2 = model_phase2.train(data=FINAL_DATA_YAML, **phase2_args)
t2 = (time.time()-t0)/60
print(f"✅ Fase 2 completada en {t2:.1f} min")

In [ ]:
# =============================================
# 7. Evaluación Fase 2
# =============================================
best_phase2 = os.path.join(PHASE2_DIR,'weights','best.pt')
assert os.path.exists(best_phase2), 'best.pt Fase2 no encontrado'
model_eval2 = YOLO(best_phase2)
print("📊 Evaluando Fase 2 (val)...")
val2 = model_eval2.val(data=FINAL_DATA_YAML, split='val')
m2 = {
  'mAP50': float(val2.results_dict.get('metrics/mAP50(B)',0)),
  'mAP50_95': float(val2.results_dict.get('metrics/mAP50-95(B)',0)),
  'precision': float(val2.results_dict.get('metrics/precision(B)',0)),
  'recall': float(val2.results_dict.get('metrics/recall(B)',0))
}
print(json.dumps(m2, indent=2))
with open(os.path.join(PHASE2_DIR,'evaluation_phase2.json'),'w') as f: json.dump(m2,f,indent=2)

# Consolidado
consolidated = {'phase1': m1, 'phase2': m2}
with open(os.path.join(PHASE2_DIR,'evaluation_consolidated.json'),'w') as f: json.dump(consolidated,f,indent=2)
print("💾 evaluation_consolidated.json creado")

In [ ]:
# =============================================
# 8. Visualizaciones Comparativas
# =============================================
def load_results_csv(d):
    p = os.path.join(d,'results.csv')
    return pd.read_csv(p) if os.path.exists(p) else None

df1 = load_results_csv(PHASE1_DIR)
df2 = load_results_csv(PHASE2_DIR)
assert df1 is not None and df2 is not None, 'results.csv faltante'

viz_dir = os.path.join(PHASE2_DIR,'visualizations')
os.makedirs(viz_dir, exist_ok=True)
sns.set_style('whitegrid')

plt.figure(figsize=(16,5))
plt.subplot(1,2,1)
plt.plot(df1['metrics/mAP50(B)'], label='Phase1 mAP50', c='royalblue')
plt.plot(range(len(df1), len(df1)+len(df2)), df2['metrics/mAP50(B)'], label='Phase2 mAP50', c='navy')
plt.title('Evolución mAP@0.5'); plt.xlabel('Época global'); plt.ylabel('mAP50'); plt.legend()
plt.subplot(1,2,2)
plt.plot(df1['metrics/precision(B)'], label='Phase1 Precision', c='green')
plt.plot(range(len(df1), len(df1)+len(df2)), df2['metrics/precision(B)'], label='Phase2 Precision', c='darkgreen')
plt.title('Evolución Precision'); plt.xlabel('Época global'); plt.ylabel('Precision'); plt.legend()
p_curve = os.path.join(viz_dir,'phase_comparison_curves.png')
plt.tight_layout(); plt.savefig(p_curve, dpi=220); plt.show()

labels = ['mAP50','Precision','Recall']
phase1_vals = [m1['mAP50'], m1['precision'], m1['recall']]
phase2_vals = [m2['mAP50'], m2['precision'], m2['recall']]
targets = [0.75,0.75,0.75]
x = np.arange(len(labels)); w = 0.25
plt.figure(figsize=(10,5))
plt.bar(x-w, phase1_vals, w, label='Phase1', color='#87b5ff')
plt.bar(x, phase2_vals, w, label='Phase2', color='#7ae29a')
plt.bar(x+w, targets, w, label='Objetivo', color='#f2d46b')
for i,v in enumerate(phase2_vals): plt.text(i, v+0.01, f"{v:.3f}", ha='center')
plt.xticks(x, labels); plt.ylabel('Score'); plt.title('Comparación Fase1 vs Fase2 vs Objetivo'); plt.ylim(0, max(1.0,max(phase2_vals)+0.1)); plt.legend()
p_bar = os.path.join(viz_dir,'phase_comparison_bars.png')
plt.savefig(p_bar, dpi=220, bbox_inches='tight'); plt.show()

with open(os.path.join(viz_dir,'visualizations_index.json'),'w') as f:
    json.dump({'curves': p_curve, 'bars': p_bar, 'phase1': m1, 'phase2': m2}, f, indent=2)
print('✅ Visualizaciones guardadas')

In [ ]:
# =============================================
# 9. Exportaciones (para despliegue)
# =============================================
print('📦 Exportando modelo final (ONNX, TorchScript)...')
final_model = YOLO(best_phase2 if os.path.exists(best_phase2) else best_phase1)
EXPORT_DIR = os.path.join(PHASE2_DIR,'exports'); os.makedirs(EXPORT_DIR, exist_ok=True)
os.chdir(EXPORT_DIR)
final_model.export(format='onnx', opset=12, simplify=True)
final_model.export(format='torchscript')
print('✅ Export completado: ONNX y TorchScript')

## ✅ Finalización
**Directorios clave:**  
- Fase 1: `PHASE1_DIR`  
- Fase 2: `PHASE2_DIR`  
Archivos relevantes: best.pt (fase2), evaluation_phase2.json, visualizations, exports.

**Siguiente:** Integrar best.pt en pipeline de inferencia y documentar mejoras fase2.