# ü´Ä 1D CNN Autoencoder ‚Äî RTX 5080 Ready

Este notebook reconstruye el flujo completo para entrenar y evaluar un **Autoencoder 1D CNN** sobre los datos preparados (PTB-XL + MIMIC normalizados, derivaciones II/V1/V5 a 500‚ÄØHz). Est√° pensado para funcionar sin sobresaltos en Windows con una **RTX 5080 (Blackwell)**:

- Celda √∫nica de instalaci√≥n que se asegura de tener la build correcta de **PyTorch nightly cu128** y a√±ade rutas DLL indispensables.
- Configuraci√≥n centralizada y auto documentada (JSON opcional) para hiperpar√°metros, rutas, MLflow y umbrales.
- Pipeline con **Prefect** + **MLflow** minimalista pero robusto: entrenamiento, logging, artefactos y evaluaci√≥n.
- Fallback autom√°tico a CPU si CUDA no queda disponible tras la instalaci√≥n.

> ‚ñ∂Ô∏è Recomendaci√≥n: ejecuta cada celda en orden. Si instalas PyTorch, reinicia el kernel antes de continuar.


## üß≠ Gu√≠a r√°pida

1. **Instala dependencias** (celda "Setup RTX 5080"), reinicia el kernel si instal√≥ PyTorch.
2. **Configura** rutas y par√°metros en la celda "Config" (o crea/edita `../config/ae1d_config.json`).
3. **Carga datos** con la celda de DataLoaders apuntando al dataset combinado PTB-XL + MIMIC (`../data/combined_ptbxl_mimic_500hz_iv1v5`).
4. **Define y revisa** el modelo Autoencoder.
5. **Entrena** con Prefect + MLflow (celda de entrenamiento).
6. **Eval√∫a** m√©tricas de reconstrucci√≥n y detecci√≥n de anomal√≠as.

Durante todo el flujo se guardan artefactos y par√°metros en `../mlflow.db` y `../mlflow_artifacts/`.

---

## üîß Configuraci√≥n R√°pida

### üéØ Cambiar Umbral de Anomal√≠as
**Celda 10** - B√∫squeda de umbral √≥ptimo:
```python
# Percentiles m√°s altos = menos falsos positivos
percentiles=[80, 85, 90, 92, 94, 96, 98, 99]

# FPR m√°ximo permitido (5% = 0.05)
max_fpr=0.05
```

### ‚öôÔ∏è Cambiar Pesos/Hiperpar√°metros del Modelo
**Celda 2** - Configuraci√≥n:
```python
# En el archivo JSON o en la celda:
{
  "model": {
    "base_filters": 32,      # M√°s = modelo m√°s grande
    "kernels": [11, 7, 9, 11], # Tama√±os de kernel
    "leak": 0.1              # Pendiente LeakyReLU
  },
  "training": {
    "epochs": 30,            # N√∫mero de √©pocas
    "batch_size": 64,        # Tama√±o de batch
    "lr": 0.0003,            # Learning rate
    "weight_decay": 1e-5     # Regularizaci√≥n L2
  }
}
```


In [1]:
# ========================================
# üîß Setup RTX 5080 ‚Äî dependencias + CUDA DLL
# Ejecuta una sola vez (o tras actualizar drivers/librer√≠as)
# ========================================
import os
import sys
import subprocess
from pathlib import Path
from textwrap import dedent

print(f"Python: {sys.executable}")
print(f"Working dir: {Path.cwd().resolve()}")

CUDA_CANDIDATES = [
    os.environ.get("CUDA_PATH"),
    os.environ.get("CUDA_PATH_V12_8"),
    r"C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.8\\bin",
    r"C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.8\\libnvvp",
    r"C:\\Program Files\\NVIDIA\\CUDNN",
]

added = []
if hasattr(os, "add_dll_directory"):
    for candidate in CUDA_CANDIDATES:
        if not candidate:
            continue
        path = Path(candidate)
        if path.is_dir():
            try:
                os.add_dll_directory(str(path))
                added.append(str(path))
            except FileNotFoundError:
                pass
if added:
    print("DLL directories a√±adidos:")
    for path in added:
        print("  -", path)

BASE_PACKAGES = [
    "mlflow>=2.16",
    "prefect>=3",
    "scikit-learn",
    "matplotlib",
    "pandas",
    "numpy",
]

def pip_install(spec: str) -> None:
    module_name = spec.split("==")[0].split("[")[0].replace("-", "_")
    try:
        __import__(module_name)
        print(f"‚úî {spec} ya instalado")
    except Exception:
        print(f"‚è≥ Instalando {spec} ...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", spec])

for pkg in BASE_PACKAGES:
    pip_install(pkg)

TORCH_INSTALL_CMD = [
    sys.executable,
    "-m",
    "pip",
    "install",
    "--upgrade",
    "--pre",
    "torch",
    "torchvision",
    "torchaudio",
    "--index-url",
    "https://download.pytorch.org/whl/nightly/cu128",
]


def ensure_torch_cuda() -> "tuple[object | None, dict]":
    """Importa torch, o instala la nightly cu128 si hace falta."""
    info: dict[str, str | float | bool] = {}
    try:
        import torch  # type: ignore
        info["torch_version"] = getattr(torch, "__version__", "desconocida")
        info["cuda_version"] = getattr(getattr(torch, "version", object()), "cuda", "desconocida")
        info["cuda_available"] = bool(torch.cuda.is_available())
        if "cu128" not in info["torch_version"] and not str(info["cuda_version"]).startswith("12.8"):
            raise RuntimeError(
                f"Build {info['torch_version']} no es cu128. Se reinstalar√° la nightly para RTX 5080."
            )
        return torch, info
    except Exception as err:
        print("‚ö†Ô∏è Torch no usable todav√≠a:", err)
        print("   Instalando nightly cu128 desde PyTorch (puede tardar).")
        subprocess.check_call(TORCH_INSTALL_CMD)
        import importlib
        import time
        time.sleep(2)
        importlib.invalidate_caches()
        import torch  # type: ignore
        info["torch_version"] = getattr(torch, "__version__", "desconocida")
        info["cuda_version"] = getattr(getattr(torch, "version", object()), "cuda", "desconocida")
        info["cuda_available"] = bool(torch.cuda.is_available())
        return torch, info


torch, torch_info = ensure_torch_cuda()

print("Torch info:")
for k, v in torch_info.items():
    print(f"  - {k}: {v}")

if torch_info.get("cuda_available"):
    try:
        gpu_name = torch.cuda.get_device_name(0)
        cc = torch.cuda.get_device_properties(0)
        print(f"GPU detectada: {gpu_name} | SM {cc.major}{cc.minor}")
    except Exception as e:
        print("‚ö†Ô∏è CUDA disponible pero no se pudo consultar GPU:", e)
else:
    print(dedent(
        """
        ‚ö†Ô∏è CUDA sigue inactiva. Revisa drivers / reinicia kernel tras la instalaci√≥n.
        Si el problema contin√∫a, ejecuta manualmente:
          pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128
        """
    ))



Python: c:\Python311\python.exe
Working dir: S:\Proyecto final\Books
DLL directories a√±adidos:
  - C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8
  - C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8
  - C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\bin
  - C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\libnvvp
‚è≥ Instalando mlflow>=2.16 ...
‚è≥ Instalando prefect>=3 ...
‚è≥ Instalando scikit-learn ...
‚úî matplotlib ya instalado
‚úî pandas ya instalado
‚úî numpy ya instalado
Torch info:
  - torch_version: 2.10.0.dev20251113+cu128
  - cuda_version: 12.8
  - cuda_available: True
GPU detectada: NVIDIA GeForce RTX 5080 | SM 120


In [25]:
# ========================================
# ‚öôÔ∏è Config ‚Äî hiperpar√°metros centralizados
# ========================================
from __future__ import annotations

import json
import random
from dataclasses import dataclass, asdict, field
from pprint import pprint

import numpy as np
import torch

CONFIG_JSON = Path("../config/ae1d_config.json")
COMBINED_DEFAULT = Path("../data/combined_ptbxl_mimic_500hz_iv1v5").resolve()


@dataclass
class PathsConfig:
    output_root: str = str(COMBINED_DEFAULT)


@dataclass
class TrainingConfig:
    seed: int = 42
    device: str = "auto"  # "auto" -> cuda si disponible, sino cpu
    epochs: int = 30
    batch_size: int = 64
    lr: float = 3e-4
    weight_decay: float = 1e-5
    clip_grad: float | None = None
    loss_fn: str = "mse"


@dataclass
class ModelConfig:
    in_channels: int = 3
    base_filters: int = 32
    leak: float = 0.1
    kernels: tuple[int, int, int, int] = (11, 7, 9, 11)


@dataclass
class ThresholdConfig:
    use_mean_std: bool = False
    k_std: float = 3.0
    percentile: float = 98.0


@dataclass
class MlflowConfig:
    experiment_name: str = "ae1d"
    tracking_uri: str | None = None  # se asigna en runtime al sqlite ../mlflow.db


@dataclass
class NotebookConfig:
    paths: PathsConfig = field(default_factory=PathsConfig)
    training: TrainingConfig = field(default_factory=TrainingConfig)
    model: ModelConfig = field(default_factory=ModelConfig)
    threshold: ThresholdConfig = field(default_factory=ThresholdConfig)
    mlflow: MlflowConfig = field(default_factory=MlflowConfig)

    def to_dict(self) -> dict:
        return {
            "paths": asdict(self.paths),
            "training": asdict(self.training),
            "model": asdict(self.model),
            "threshold": asdict(self.threshold),
            "mlflow": asdict(self.mlflow),
        }

    @classmethod
    def from_dict(cls, data: dict) -> "NotebookConfig":
        def _get(section: str, dataclass_type):
            return dataclass_type(**data.get(section, {}))

        return cls(
            paths=_get("paths", PathsConfig),
            training=_get("training", TrainingConfig),
            model=_get("model", ModelConfig),
            threshold=_get("threshold", ThresholdConfig),
            mlflow=_get("mlflow", MlflowConfig),
        )


DEFAULT_CONFIG = NotebookConfig()

user_cfg: NotebookConfig = DEFAULT_CONFIG
if CONFIG_JSON.exists():
    try:
        with open(CONFIG_JSON, "r", encoding="utf-8") as f:
            data = json.load(f)
        user_cfg = NotebookConfig.from_dict({**DEFAULT_CONFIG.to_dict(), **data})
        print(f"Usando configuraci√≥n cargada desde {CONFIG_JSON}")
    except Exception as err:
        print(f"‚ö†Ô∏è No se pudo leer {CONFIG_JSON}: {err}. Se usar√° la configuraci√≥n por defecto.")
else:
    CONFIG_JSON.parent.mkdir(parents=True, exist_ok=True)
    with open(CONFIG_JSON, "w", encoding="utf-8") as f:
        json.dump(DEFAULT_CONFIG.to_dict(), f, indent=2, ensure_ascii=False)
    print(f"Plantilla de configuraci√≥n creada en {CONFIG_JSON}")

CONFIG = user_cfg

if not Path(CONFIG.paths.output_root).exists():
    print(
        f"‚ö†Ô∏è Ruta {CONFIG.paths.output_root} no existe. Se usar√° el dataset combinado por defecto: {COMBINED_DEFAULT}"
    )
    CONFIG.paths.output_root = str(COMBINED_DEFAULT)

# --- helpers ---
def resolve_device(device_setting: str) -> str:
    if device_setting.lower() == "auto":
        return "cuda" if torch.cuda.is_available() else "cpu"
    return device_setting.lower()


def set_seed_everywhere(seed: int) -> None:
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)


TRAINING = CONFIG.training
DEVICE = resolve_device(TRAINING.device)
set_seed_everywhere(TRAINING.seed)

OUTPUT_ROOT = Path(CONFIG.paths.output_root).resolve()
OUTPUT_ROOT.mkdir(parents=True, exist_ok=True)
print(f"Dataset utilizado: {OUTPUT_ROOT}")
X_NORM_MM_DAT = OUTPUT_ROOT / "X_norm_mm.dat"
X_NORM_RAW_DAT = OUTPUT_ROOT / "X_norm_raw.dat"
X_NORM_FILT_DAT = OUTPUT_ROOT / "X_norm_filt.dat"
X_ANOM_MM_NPY = OUTPUT_ROOT / "X_anom_mm.npy"
DIMS_JSON = OUTPUT_ROOT / "dims.json"
SPLITS_DIR = OUTPUT_ROOT / "splits"
IDX_NORM_TRAIN = SPLITS_DIR / "idx_norm_train.npy"
IDX_NORM_VAL = SPLITS_DIR / "idx_norm_val.npy"
IDX_NORM_TEST = SPLITS_DIR / "idx_norm_test.npy"
IDX_ANOM_VAL = SPLITS_DIR / "idx_anom_val.npy"
IDX_ANOM_TEST = SPLITS_DIR / "idx_anom_test.npy"

print("Configuraci√≥n activa:")
pprint(CONFIG.to_dict())
print(f"Dispositivo seleccionado: {DEVICE}")



Usando configuraci√≥n cargada desde ..\config\ae1d_config.json
Dataset utilizado: S:\Proyecto final\data\combined_ptbxl_mimic_500hz_iv1v5
Configuraci√≥n activa:
{'mlflow': {'experiment_name': 'ptbxl_ae_1dcnn_iv1v5', 'tracking_uri': None},
 'model': {'base_filters': 32,
           'in_channels': 3,
           'kernels': [11, 7, 9, 11],
           'leak': 0.1},
 'paths': {'output_root': 'S:\\Proyecto '
                          'final\\data\\combined_ptbxl_mimic_500hz_iv1v5'},
 'threshold': {'k_std': 3.0, 'percentile': 98.0, 'use_mean_std': False},
 'training': {'batch_size': 64,
              'clip_grad': None,
              'device': 'auto',
              'epochs': 25,
              'loss_fn': 'mse',
              'lr': 0.0003,
              'seed': 42,
              'weight_decay': 1e-05}}
Dispositivo seleccionado: cuda


In [26]:
# ========================================
# üîß CORRECCI√ìN: Limpiar NaN/Inf en los datasets
# ========================================
# Redefinir las clases Dataset para limpiar autom√°ticamente valores no finitos

class MemmapDatasetClean(Dataset):
    """Versi√≥n de MemmapDataset que limpia autom√°ticamente NaN/Inf"""
    def __init__(self, x_memmap: np.memmap, indices: np.ndarray):
        self.x = x_memmap
        self.indices = np.array(indices, dtype=np.int64)

    def __len__(self) -> int:
        return len(self.indices)

    def __getitem__(self, idx: int) -> torch.Tensor:
        sample = self.x[self.indices[idx]]  # [T, C]
        sample = np.transpose(sample, (1, 0)).copy()  # [C, T]
        sample_tensor = torch.from_numpy(sample).float()
        
        # Limpiar valores no finitos (NaN/Inf) autom√°ticamente
        if not torch.isfinite(sample_tensor).all():
            # Reemplazar NaN con 0 e Inf con valores grandes pero finitos
            sample_tensor = torch.where(torch.isfinite(sample_tensor), sample_tensor, torch.zeros_like(sample_tensor))
            # Clamp para asegurar que no haya valores extremos
            sample_tensor = torch.clamp(sample_tensor, min=-1e6, max=1e6)
        
        return sample_tensor


class NpyDatasetClean(Dataset):
    """Versi√≥n de NpyDataset que limpia autom√°ticamente NaN/Inf"""
    def __init__(self, arr: np.ndarray, indices: np.ndarray):
        self.arr = arr
        self.indices = np.array(indices, dtype=np.int64)

    def __len__(self) -> int:
        return len(self.indices)

    def __getitem__(self, idx: int) -> torch.Tensor:
        sample = self.arr[self.indices[idx]]  # [T, C]
        sample = np.transpose(sample, (1, 0)).copy()
        sample_tensor = torch.from_numpy(sample).float()
        
        # Limpiar valores no finitos (NaN/Inf) autom√°ticamente
        if not torch.isfinite(sample_tensor).all():
            # Reemplazar NaN con 0 e Inf con valores grandes pero finitos
            sample_tensor = torch.where(torch.isfinite(sample_tensor), sample_tensor, torch.zeros_like(sample_tensor))
            # Clamp para asegurar que no haya valores extremos
            sample_tensor = torch.clamp(sample_tensor, min=-1e6, max=1e6)
        
        return sample_tensor


# Recrear datasets con limpieza autom√°tica
print("Recreando datasets con limpieza autom√°tica de NaN/Inf...")
ds_train = MemmapDatasetClean(X_norm_mm, idx_norm_train)
ds_val_n = MemmapDatasetClean(X_norm_mm, idx_norm_val)
ds_test_n = MemmapDatasetClean(X_norm_mm, idx_norm_test)

if len(idx_anom_val) > 0 and X_anom_mm.shape[0] > 0:
    ds_val_a = NpyDatasetClean(X_anom_mm, idx_anom_val)
else:
    ds_val_a = None

if len(idx_anom_test) > 0 and X_anom_mm.shape[0] > 0:
    ds_test_a = NpyDatasetClean(X_anom_mm, idx_anom_test)
else:
    ds_test_a = None

# Recrear dataloaders
dl_train = make_dataloader(ds_train, BATCH_SIZE, shuffle=True)
dl_val_n = make_dataloader(ds_val_n, BATCH_SIZE, shuffle=False)
dl_test_n = make_dataloader(ds_test_n, BATCH_SIZE, shuffle=False)
dl_val_a = make_dataloader(ds_val_a, BATCH_SIZE, shuffle=False) if ds_val_a else None
dl_test_a = make_dataloader(ds_test_a, BATCH_SIZE, shuffle=False) if ds_test_a else None

print("‚úì Datasets recreados con limpieza autom√°tica de NaN/Inf")
print(f"  Train: {len(ds_train)}, Val: {len(ds_val_n)}, Test: {len(ds_test_n)}")


Recreando datasets con limpieza autom√°tica de NaN/Inf...
‚úì Datasets recreados con limpieza autom√°tica de NaN/Inf
  Train: 121284, Val: 15160, Test: 15162


In [14]:
# ========================================
# üìÇ Datos ‚Äî memmap combinado (PTB-XL + MIMIC)
# ========================================
from typing import Tuple

import pandas as pd
from torch.utils.data import Dataset, DataLoader


def open_memmap_known_tc(path: Path, T: int, C: int, mode: str = "r", dtype=np.float32):
    if not path.exists():
        raise FileNotFoundError(f"No se encontr√≥ {path}. Verifica la extracci√≥n previa.")
    bytes_total = os.path.getsize(path)
    if bytes_total % 4 != 0:
        raise RuntimeError(f"{path} no es m√∫ltiplo de 4 bytes (float32)")
    n_float32 = bytes_total // 4
    if n_float32 % (T * C) != 0:
        raise RuntimeError(f"Tama√±o inconsistente con T={T}, C={C}")
    N = n_float32 // (T * C)
    return np.memmap(path, dtype=dtype, mode=mode, shape=(N, T, C))


if DIMS_JSON.exists():
    with open(DIMS_JSON, "r", encoding="utf-8") as f:
        dims = json.load(f)
elif (OUTPUT_ROOT / "dims.npy").exists():
    arr = np.load(OUTPUT_ROOT / "dims.npy")
    dims = {"T": int(arr[0]), "C": int(arr[1])}
else:
    raise FileNotFoundError(
        f"No se encontraron dims.json ni dims.npy en {OUTPUT_ROOT}. Ejecuta la preparaci√≥n de datos combinados."
    )

T = int(dims["T"])
C = int(dims["C"])
assert C == CONFIG.model.in_channels, f"Esperaba {CONFIG.model.in_channels} derivaciones, pero C={C}"


class MemmapDataset(Dataset):
    def __init__(self, x_memmap: np.memmap, indices: np.ndarray):
        self.x = x_memmap
        self.indices = np.array(indices, dtype=np.int64)

    def __len__(self) -> int:
        return len(self.indices)

    def __getitem__(self, idx: int) -> torch.Tensor:
        sample = self.x[self.indices[idx]]  # [T, C]
        sample = np.transpose(sample, (1, 0)).copy()  # [C, T]
        return torch.from_numpy(sample).float()


X_norm_mm = open_memmap_known_tc(X_NORM_MM_DAT, T, C)
X_anom_mm = np.load(X_ANOM_MM_NPY) if X_ANOM_MM_NPY.exists() else np.zeros((0, T, C), dtype=np.float32)

idx_norm_train = np.load(IDX_NORM_TRAIN)
idx_norm_val = np.load(IDX_NORM_VAL)
idx_norm_test = np.load(IDX_NORM_TEST)
idx_anom_val = np.load(IDX_ANOM_VAL) if IDX_ANOM_VAL.exists() else np.zeros((0,), dtype=np.int64)
idx_anom_test = np.load(IDX_ANOM_TEST) if IDX_ANOM_TEST.exists() else np.zeros((0,), dtype=np.int64)


def make_dataloader(dataset: Dataset, batch_size: int, shuffle: bool) -> DataLoader:
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=0, drop_last=False)


ds_train = MemmapDataset(X_norm_mm, idx_norm_train)
ds_val_n = MemmapDataset(X_norm_mm, idx_norm_val)
ds_test_n = MemmapDataset(X_norm_mm, idx_norm_test)


class NpyDataset(Dataset):
    def __init__(self, arr: np.ndarray, indices: np.ndarray):
        self.arr = arr
        self.indices = np.array(indices, dtype=np.int64)

    def __len__(self) -> int:
        return len(self.indices)

    def __getitem__(self, idx: int) -> torch.Tensor:
        sample = self.arr[self.indices[idx]]  # [T, C]
        sample = np.transpose(sample, (1, 0)).copy()
        return torch.from_numpy(sample).float()


ds_val_a = NpyDataset(X_anom_mm, idx_anom_val) if len(idx_anom_val) else None
ds_test_a = NpyDataset(X_anom_mm, idx_anom_test) if len(idx_anom_test) else None

BATCH_SIZE = TRAINING.batch_size

dl_train = make_dataloader(ds_train, BATCH_SIZE, shuffle=True)
dl_val_n = make_dataloader(ds_val_n, BATCH_SIZE, shuffle=False)
dl_test_n = make_dataloader(ds_test_n, BATCH_SIZE, shuffle=False)
dl_val_a = make_dataloader(ds_val_a, BATCH_SIZE, shuffle=False) if ds_val_a else None
dl_test_a = make_dataloader(ds_test_a, BATCH_SIZE, shuffle=False) if ds_test_a else None

print("Samples disponibles:")
print(f"  Train normales: {len(ds_train)}")
print(f"  Val normales: {len(ds_val_n)} | Val an√≥malos: {0 if ds_val_a is None else len(ds_val_a)}")
print(f"  Test normales: {len(ds_test_n)} | Test an√≥malos: {0 if ds_test_a is None else len(ds_test_a)}")



Samples disponibles:
  Train normales: 121284
  Val normales: 15160 | Val an√≥malos: 100
  Test normales: 15162 | Test an√≥malos: 900


In [15]:
# ========================================
# üîß CORRECCI√ìN: Recargar an√≥malos correctamente
# ========================================
# IMPORTANTE: Ejecuta primero la celda 3 (carga de datos) antes de esta celda

import json
import numpy as np

print("="*60)
print("CORRECCI√ìN: Recargando an√≥malos")
print("="*60)

# Verificar que las variables de configuraci√≥n est√©n definidas
required_vars = ['IDX_ANOM_VAL', 'IDX_ANOM_TEST', 'X_ANOM_MM_NPY', 'DIMS_JSON']
missing_vars = [v for v in required_vars if v not in globals()]
if missing_vars:
    raise NameError(f"Variables no definidas: {missing_vars}. Ejecuta primero la celda 3 (carga de datos).")

# Verificar que las funciones necesarias est√©n definidas
if 'NpyDataset' not in globals():
    raise NameError("NpyDataset no est√° definido. Ejecuta primero la celda 3 (carga de datos).")
if 'make_dataloader' not in globals():
    raise NameError("make_dataloader no est√° definido. Ejecuta primero la celda 3 (carga de datos).")

# Asegurar que NpyDataset y make_dataloader est√©n disponibles localmente
NpyDataset = globals()['NpyDataset']
make_dataloader = globals()['make_dataloader']

# Recargar √≠ndices con verificaci√≥n
if IDX_ANOM_VAL.exists():
    idx_anom_val = np.load(IDX_ANOM_VAL)
    print(f"‚úì {IDX_ANOM_VAL.name}: {len(idx_anom_val)} √≠ndices cargados")
else:
    idx_anom_val = np.zeros((0,), dtype=np.int64)
    print(f"‚úó {IDX_ANOM_VAL.name}: NO EXISTE")

if IDX_ANOM_TEST.exists():
    idx_anom_test = np.load(IDX_ANOM_TEST)
    print(f"‚úì {IDX_ANOM_TEST.name}: {len(idx_anom_test)} √≠ndices cargados")
else:
    idx_anom_test = np.zeros((0,), dtype=np.int64)
    print(f"‚úó {IDX_ANOM_TEST.name}: NO EXISTE")

# Cargar o recargar X_anom_mm si es necesario
if 'X_anom_mm' not in globals():
    # X_anom_mm no est√° definido, cargarlo
    if X_ANOM_MM_NPY.exists():
        X_anom_mm = np.load(X_ANOM_MM_NPY)
        print(f"‚úì X_anom_mm cargado: shape {X_anom_mm.shape}")
    else:
        # Cargar T y C desde dims.json
        if not DIMS_JSON.exists():
            raise FileNotFoundError(f"No se encontr√≥ {DIMS_JSON}. Ejecuta primero la celda 3.")
        with open(DIMS_JSON, "r", encoding="utf-8") as f:
            dims = json.load(f)
        T = int(dims["T"])
        C = int(dims["C"])
        X_anom_mm = np.zeros((0, T, C), dtype=np.float32)
        print(f"‚úó {X_ANOM_MM_NPY.name}: NO EXISTE - usando array vac√≠o (T={T}, C={C})")
else:
    # X_anom_mm est√° definido en globals(), obtenerlo localmente
    X_anom_mm = globals()['X_anom_mm']
    if hasattr(X_anom_mm, 'shape') and X_anom_mm.shape[0] == 0:
        # X_anom_mm est√° vac√≠o, intentar recargarlo
        if X_ANOM_MM_NPY.exists():
            X_anom_mm = np.load(X_ANOM_MM_NPY)
            print(f"‚úì X_anom_mm recargado: shape {X_anom_mm.shape}")
        else:
            print(f"‚ö† {X_ANOM_MM_NPY.name}: NO EXISTE - X_anom_mm sigue vac√≠o")
    else:
        # X_anom_mm ya est√° cargado y tiene datos
        print(f"‚úì X_anom_mm ya cargado: shape {X_anom_mm.shape}")

print(f"X_anom_mm.shape: {X_anom_mm.shape}")

# Recrear datasets de an√≥malos con verificaci√≥n
if len(idx_anom_val) > 0 and X_anom_mm.shape[0] > 0:
    if np.max(idx_anom_val) < X_anom_mm.shape[0]:
        ds_val_a = NpyDataset(X_anom_mm, idx_anom_val)
        print(f"‚úì ds_val_a recreado: {len(ds_val_a)} muestras")
    else:
        print(f"‚ö† ERROR: √çndices val exceden tama√±o ({np.max(idx_anom_val)} >= {X_anom_mm.shape[0]})")
        ds_val_a = None
else:
    ds_val_a = None
    if len(idx_anom_val) == 0:
        print("‚ö† No hay √≠ndices de an√≥malos para val")
    if X_anom_mm.shape[0] == 0:
        print("‚ö† X_anom_mm est√° vac√≠o")

if len(idx_anom_test) > 0 and X_anom_mm.shape[0] > 0:
    if np.max(idx_anom_test) < X_anom_mm.shape[0]:
        ds_test_a = NpyDataset(X_anom_mm, idx_anom_test)
        print(f"‚úì ds_test_a recreado: {len(ds_test_a)} muestras")
    else:
        print(f"‚ö† ERROR: √çndices test exceden tama√±o ({np.max(idx_anom_test)} >= {X_anom_mm.shape[0]})")
        ds_test_a = None
else:
    ds_test_a = None
    if len(idx_anom_test) == 0:
        print("‚ö† No hay √≠ndices de an√≥malos para test")
    if X_anom_mm.shape[0] == 0:
        print("‚ö† X_anom_mm est√° vac√≠o")

# Recrear dataloaders (verificar que BATCH_SIZE est√© definido)
if 'BATCH_SIZE' not in globals():
    if 'TRAINING' in globals():
        BATCH_SIZE = TRAINING.batch_size
        print(f"‚úì BATCH_SIZE obtenido de TRAINING: {BATCH_SIZE}")
    else:
        BATCH_SIZE = 64  # valor por defecto
        print("‚ö† BATCH_SIZE no definido, usando 64 por defecto")

if ds_val_a is not None:
    dl_val_a = make_dataloader(ds_val_a, BATCH_SIZE, shuffle=False)
    print(f"‚úì dl_val_a recreado")
if ds_test_a is not None:
    dl_test_a = make_dataloader(ds_test_a, BATCH_SIZE, shuffle=False)
    print(f"‚úì dl_test_a recreado")

print("\n" + "="*60)
print("RESUMEN FINAL:")
print("="*60)
if 'ds_train' in globals():
    ds_train = globals()['ds_train']
    print(f"  Train normales: {len(ds_train)}")
if 'ds_val_n' in globals():
    ds_val_n = globals()['ds_val_n']
    print(f"  Val normales: {len(ds_val_n)} | Val an√≥malos: {0 if ds_val_a is None else len(ds_val_a)}")
if 'ds_test_n' in globals():
    ds_test_n = globals()['ds_test_n']
    print(f"  Test normales: {len(ds_test_n)} | Test an√≥malos: {0 if ds_test_a is None else len(ds_test_a)}")

if ds_val_a is None and ds_test_a is None:
    print("\n‚ö† PROBLEMA: No se pudieron cargar an√≥malos")
    print("  SOLUCI√ìN:")
    print("  1. Ve a 01_ecg_preprocessing_demo.ipynb")
    print("  2. Ejecuta la celda de train_valid_test_split()")
    print("  3. Verifica que se generen los archivos idx_anom_val.npy y idx_anom_test.npy")
    print("  4. Vuelve a ejecutar esta celda")
else:
    print("\n‚úì An√≥malos cargados correctamente")
print("="*60)


CORRECCI√ìN: Recargando an√≥malos
‚úì idx_anom_val.npy: 100 √≠ndices cargados
‚úì idx_anom_test.npy: 900 √≠ndices cargados
‚úì X_anom_mm ya cargado: shape (1000, 5000, 3)
X_anom_mm.shape: (1000, 5000, 3)
‚úì ds_val_a recreado: 100 muestras
‚úì ds_test_a recreado: 900 muestras
‚úì dl_val_a recreado
‚úì dl_test_a recreado

RESUMEN FINAL:
  Train normales: 121284
  Val normales: 15160 | Val an√≥malos: 100
  Test normales: 15162 | Test an√≥malos: 900

‚úì An√≥malos cargados correctamente


In [27]:
# ========================================
# üß† Modelo ‚Äî Autoencoder 1D CNN
# ========================================
import torch.nn as nn


class Cropping1D(nn.Module):
    def __init__(self, target_len: int):
        super().__init__()
        self.target_len = target_len

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        current = x.shape[-1]
        if current == self.target_len:
            return x
        if current > self.target_len:
            diff = current - self.target_len
            start = diff // 2
            end = start + self.target_len
            return x[..., start:end]
        # pad reflect
        pad_total = self.target_len - current
        left = pad_total // 2
        right = pad_total - left
        return nn.functional.pad(x, (left, right), mode="reflect")


class AE1DCNN(nn.Module):
    def __init__(self, cfg: ModelConfig, in_len: int):
        super().__init__()
        c_in = cfg.in_channels
        base = cfg.base_filters
        leak = cfg.leak
        k1, k2, k3, k4 = cfg.kernels
        act = nn.LeakyReLU(leak, inplace=True)

        self.encoder = nn.Sequential(
            nn.Conv1d(c_in, base * 2, kernel_size=k1, padding=k1 // 2),
            act,
            nn.MaxPool1d(kernel_size=2, stride=2),
            nn.Conv1d(base * 2, base, kernel_size=k2, padding=k2 // 2),
            act,
            nn.MaxPool1d(kernel_size=2, stride=2),
            nn.Conv1d(base, base, kernel_size=k3, padding=k3 // 2),
            act,
            nn.Conv1d(base, base, kernel_size=k4, padding=k4 // 2),
            act,
        )

        self.decoder = nn.Sequential(
            nn.Upsample(scale_factor=2, mode="nearest"),
            nn.Conv1d(base, base * 2, kernel_size=k3, padding=k3 // 2),
            act,
            nn.Upsample(scale_factor=2, mode="nearest"),
            nn.Conv1d(base * 2, base, kernel_size=k2, padding=k2 // 2),
            act,
            nn.Conv1d(base, c_in, kernel_size=k1, padding=k1 // 2),
            nn.Sigmoid(),
        )

        self.crop = Cropping1D(in_len)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        z = self.encoder(x)
        y = self.decoder(z)
        return self.crop(y)


MODEL = AE1DCNN(CONFIG.model, in_len=T).to(DEVICE)
PARAMS_TOTAL = sum(p.numel() for p in MODEL.parameters())
PARAMS_TRAINABLE = sum(p.numel() for p in MODEL.parameters() if p.requires_grad)
print(MODEL)
print(f"Par√°metros totales: {PARAMS_TOTAL/1e6:.3f} M | Entrenables: {PARAMS_TRAINABLE/1e6:.3f} M")



AE1DCNN(
  (encoder): Sequential(
    (0): Conv1d(3, 64, kernel_size=(11,), stride=(1,), padding=(5,))
    (1): LeakyReLU(negative_slope=0.1, inplace=True)
    (2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv1d(64, 32, kernel_size=(7,), stride=(1,), padding=(3,))
    (4): LeakyReLU(negative_slope=0.1, inplace=True)
    (5): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv1d(32, 32, kernel_size=(9,), stride=(1,), padding=(4,))
    (7): LeakyReLU(negative_slope=0.1, inplace=True)
    (8): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
    (9): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (decoder): Sequential(
    (0): Upsample(scale_factor=2.0, mode='nearest')
    (1): Conv1d(32, 64, kernel_size=(9,), stride=(1,), padding=(4,))
    (2): LeakyReLU(negative_slope=0.1, inplace=True)
    (3): Upsample(scale_factor=2.0, mode='nearest')
    (4): Conv1d(64, 32, kernel_size=(7,), stride=(1,),

In [28]:
# ========================================
# üìü MLflow ‚Äî tracking en sqlite + artefactos locales
# ========================================
import mlflow
from mlflow.tracking import MlflowClient

PARENT_DIR = Path.cwd().parent.resolve()
TRACKING_DB = (PARENT_DIR / "mlflow.db").resolve()
ARTIFACT_ROOT = (PARENT_DIR / "mlflow_artifacts").resolve()
ARTIFACT_ROOT.mkdir(parents=True, exist_ok=True)

mlflow.set_tracking_uri(f"sqlite:///{TRACKING_DB.as_posix()}")
client = MlflowClient()
EXPERIMENT_NAME = CONFIG.mlflow.experiment_name
exp = client.get_experiment_by_name(EXPERIMENT_NAME)
if exp is None:
    EXPERIMENT_ID = client.create_experiment(EXPERIMENT_NAME, artifact_location=ARTIFACT_ROOT.as_uri())
else:
    EXPERIMENT_ID = exp.experiment_id

if mlflow.active_run() is not None:
    print("Cerrando run previo colgado:", mlflow.active_run().info.run_id)
    mlflow.end_run()

print("Tracking URI:", mlflow.get_tracking_uri())
print("Experiment ID:", EXPERIMENT_ID)
print("Artifact root:", ARTIFACT_ROOT.as_uri())



Tracking URI: sqlite:///S:/Proyecto final/mlflow.db
Experiment ID: 2
Artifact root: file:///S:/Proyecto%20final/mlflow_artifacts


In [None]:
# ========================================
# üîç EVALUACI√ìN MEJORADA: B√∫squeda de umbral √≥ptimo
# ========================================
#
# Esta celda importa el m√≥dulo de evaluaci√≥n mejorado que implementa:
# - B√∫squeda de umbral basada en percentiles
# - Evaluaci√≥n completa con m√©tricas detalladas
# - Visualizaci√≥n de resultados
#
# üìö FUNCIONES DISPONIBLES:
#   - compute_reconstruction_errors_with_labels(): Calcula errores con etiquetas
#   - find_optimal_threshold(): Busca umbral √≥ptimo basado en percentiles
#   - predict_with_threshold(): Predice etiquetas usando un umbral
#   - evaluate_test_set(): Eval√∫a en test con m√©tricas completas
#   - full_evaluation_pipeline(): Pipeline completo de evaluaci√≥n
#
# üìñ DOCUMENTACI√ìN COMPLETA:
#   Ver el archivo evaluation_threshold_tuning.py para m√°s detalles
#
# ========================================

import sys
from pathlib import Path

# Agregar el directorio actual al path para importar el m√≥dulo
sys.path.insert(0, str(Path.cwd()))

from evaluation_threshold_tuning import (
    compute_reconstruction_errors_with_labels,
    evaluate_test_set,
    find_optimal_threshold,
    full_evaluation_pipeline,
    predict_with_threshold,
)

print("‚úì M√≥dulo de evaluaci√≥n importado correctamente")
print("\nFunciones disponibles:")
print("  - compute_reconstruction_errors_with_labels(): Calcula errores con etiquetas")
print("  - find_optimal_threshold(): Busca umbral √≥ptimo basado en percentiles")
print("  - predict_with_threshold(): Predice etiquetas usando un umbral")
print("  - evaluate_test_set(): Eval√∫a en test con m√©tricas completas")
print("  - full_evaluation_pipeline(): Pipeline completo de evaluaci√≥n")


‚úì M√≥dulo de evaluaci√≥n importado correctamente

Funciones disponibles:
  - compute_reconstruction_errors_with_labels(): Calcula errores con etiquetas
  - find_optimal_threshold(): Busca umbral √≥ptimo basado en percentiles
  - predict_with_threshold(): Predice etiquetas usando un umbral
  - evaluate_test_set(): Eval√∫a en test con m√©tricas completas
  - full_evaluation_pipeline(): Pipeline completo de evaluaci√≥n


In [None]:
# ========================================
# üéØ B√öSQUEDA DE UMBRAL √ìPTIMO EN VALIDACI√ìN
# ========================================
#
# ‚ö†Ô∏è IMPORTANTE: ESTA ES LA CELDA DONDE SE DEFINE EL UMBRAL PARA DETECTAR ANOMAL√çAS
#
# Esta celda busca autom√°ticamente el mejor umbral basado en:
# 1. Percentiles del error de reconstrucci√≥n en validaci√≥n
# 2. Maximizaci√≥n del F2-score (da m√°s peso al recall de anomal√≠as)
# 3. Filtro por FPR m√°ximo (evita demasiados falsos positivos)
#
# üìù C√ìMO CAMBIAR EL UMBRAL/PERCENTIL PARA DETECTAR ANOMAL√çAS:
#
# OPCI√ìN 1: Cambiar los percentiles a probar
#   - Modifica la lista: percentiles=[80, 85, 90, 92, 94, 96, 98, 99]
#   - Percentiles m√°s altos = umbral m√°s alto = menos falsos positivos
#   - Percentiles m√°s bajos = umbral m√°s bajo = detecta m√°s anomal√≠as
#
# OPCI√ìN 2: Cambiar el FPR m√°ximo permitido
#   - Modifica: max_fpr=0.05 (5% de falsos positivos m√°ximo)
#   - max_fpr m√°s bajo = umbral m√°s alto = menos falsos positivos
#   - max_fpr m√°s alto = umbral m√°s bajo = m√°s detecciones
#
# OPCI√ìN 3: Usar un umbral fijo manual
#   - Comenta la b√∫squeda autom√°tica
#   - Define: BEST_THR = 0.0001  # Tu valor fijo
#   - El modelo clasificar√° como an√≥malo si error > BEST_THR
#
# üìä RESULTADO:
#   - BEST_THR: Umbral √≥ptimo seleccionado (se usa en la siguiente celda)
#   - best_val_metrics: M√©tricas en validaci√≥n con ese umbral
#   - df_thresholds: Tabla con todos los umbrales probados
#
# ========================================

# Cargar el mejor modelo entrenado
best_ckpt = OUTPUT_ROOT / "ae_best.pt"
if not best_ckpt.exists():
    raise FileNotFoundError(f"No se encontr√≥ el checkpoint: {best_ckpt}. Ejecuta primero el entrenamiento.")

MODEL.load_state_dict(torch.load(best_ckpt, map_location=DEVICE))
MODEL.to(DEVICE)
MODEL.eval()

print("=" * 80)
print("B√öSQUEDA DE UMBRAL √ìPTIMO")
print("=" * 80)
print(f"Modelo cargado desde: {best_ckpt}")
print(f"Dispositivo: {DEVICE}")

# Calcular errores en validaci√≥n (normales + an√≥malos)
print("\nCalculando errores de reconstrucci√≥n en validaci√≥n...")
val_errors, val_labels = compute_reconstruction_errors_with_labels(
    model=MODEL,
    normal_loader=dl_val_n,
    anomalous_loader=dl_val_a,
    device=DEVICE,
)

print(f"‚úì Errores calculados: {len(val_errors)} muestras")
print(f"  - Normales (0): {(val_labels == 0).sum()}")
print(f"  - An√≥malos (1): {(val_labels == 1).sum()}")
print(f"  - Error m√≠nimo: {val_errors.min():.6f}")
print(f"  - Error m√°ximo: {val_errors.max():.6f}")
print(f"  - Error promedio: {val_errors.mean():.6f}")

# Buscar umbral √≥ptimo
print("\nBuscando umbral √≥ptimo basado en percentiles...")
BEST_THR, best_val_metrics, df_thresholds = find_optimal_threshold(
    val_errors=val_errors,
    val_labels=val_labels,
    percentiles=[80, 85, 90, 92, 94, 96, 98, 99],  # Percentiles a probar
    max_fpr=0.05,  # FPR m√°ximo permitido (5%)
    verbose=True,
)

print(f"\n‚úì Mejor umbral seleccionado: {BEST_THR:.6f}")
print(f"  Este umbral se usar√° para evaluar en test.")

# Guardar resultados
if OUTPUT_ROOT:
    thresholds_path = OUTPUT_ROOT / "threshold_search_results.csv"
    df_thresholds.to_csv(thresholds_path, index=False)
    print(f"\n‚úì Resultados de b√∫squeda guardados en: {thresholds_path}")


B√öSQUEDA DE UMBRAL √ìPTIMO
Modelo cargado desde: S:\Proyecto final\data\combined_ptbxl_mimic_500hz_iv1v5\ae_best.pt
Dispositivo: cuda

Calculando errores de reconstrucci√≥n en validaci√≥n...
‚úì Errores calculados: 15260 muestras
  - Normales (0): 15160
  - An√≥malos (1): 100
  - Error m√≠nimo: 0.000012
  - Error m√°ximo: 0.002433
  - Error promedio: 0.000032

Buscando umbral √≥ptimo basado en percentiles...

B√öSQUEDA DE UMBRAL √ìPTIMO

Candidatos evaluados: 8 umbrales
Filtro aplicado: FPR <= 0.05

Resultados por umbral:
--------------------------------------------------------------------------------
 threshold  recall_anom  precision_anom  fpr_normal  f2_score
  0.000056     0.330000        0.054010    0.038127  0.163205
  0.000068     0.270000        0.088235    0.018404  0.191218
  0.000091     0.200000        0.130719    0.008773  0.180832
--------------------------------------------------------------------------------

‚úì Mejor umbral seleccionado: 0.000068
  - F2-score: 0.1912

In [None]:
# ========================================
# üìä EVALUACI√ìN FINAL EN TEST CON UMBRAL √ìPTIMO
# ========================================
#
# Esta celda eval√∫a el modelo en el conjunto de TEST usando el umbral √≥ptimo
# encontrado en la celda anterior (BEST_THR).
#
# ‚ö†Ô∏è IMPORTANTE: Si quieres usar un umbral diferente al encontrado autom√°ticamente:
#   1. Define manualmente: BEST_THR = 0.0001  # Tu valor personalizado
#   2. O modifica BEST_THR despu√©s de la celda 10
#
# üìä M√âTRICAS CALCULADAS:
#   - Accuracy: Precisi√≥n general
#   - Precision/Recall: Para clase normal y an√≥mala
#   - F1-score y F2-score: Para clase an√≥mala (F2 da m√°s peso al recall)
#   - Specificity (TNR): Tasa de verdaderos negativos
#   - FPR: Tasa de falsos positivos
#   - AUROC y AUPRC: M√©tricas de ranking
#
# üìÅ ARCHIVOS GENERADOS:
#   - confusion_matrix_test.png: Visualizaci√≥n de la matriz de confusi√≥n
#   - test_metrics.csv: Todas las m√©tricas en formato CSV
#
# ========================================

print("\n" + "=" * 80)
print("EVALUACI√ìN EN CONJUNTO DE TEST")
print("=" * 80)
print(f"Usando umbral √≥ptimo: {BEST_THR:.6f}")

# Evaluar en test con el umbral √≥ptimo
test_metrics = evaluate_test_set(
    model=MODEL,
    test_normal_loader=dl_test_n,
    test_anomalous_loader=dl_test_a,
    device=DEVICE,
    threshold=BEST_THR,
    output_dir=OUTPUT_ROOT,
    verbose=True,
)

# Guardar umbral √≥ptimo y m√©tricas en MLflow si est√° disponible
try:
    import mlflow
    import time

    if mlflow.active_run() is None:
        # Crear un nuevo run para la evaluaci√≥n
        mlflow.set_tracking_uri(f"sqlite:///{(Path.cwd().parent / 'mlflow.db').as_posix()}")
        with mlflow.start_run(run_name=f"eval_threshold_{int(time.time())}", experiment_id=EXPERIMENT_ID):
            mlflow.log_param("best_threshold", BEST_THR)
            mlflow.log_param("threshold_search_method", "percentile_based_f2_maximization")
            mlflow.log_metrics({f"test_{k}": float(v) for k, v in test_metrics.items() if isinstance(v, (int, float))})
            print("\n‚úì M√©tricas guardadas en MLflow")
    else:
        # Usar el run activo
        mlflow.log_param("best_threshold", BEST_THR)
        mlflow.log_metrics({f"test_{k}": float(v) for k, v in test_metrics.items() if isinstance(v, (int, float))})
        print("\n‚úì M√©tricas guardadas en MLflow")
except Exception as e:
    print(f"\n‚ö† No se pudo guardar en MLflow: {e}")

print("\n" + "=" * 80)
print("‚úÖ EVALUACI√ìN COMPLETA FINALIZADA")
print("=" * 80)



EVALUACI√ìN EN CONJUNTO DE TEST
Usando umbral √≥ptimo: 0.000068


2025/11/14 16:05:01 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2025/11/14 16:05:01 INFO mlflow.store.db.utils: Updating database tables
2025-11-14 16:05:01 INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
2025-11-14 16:05:01 INFO  [alembic.runtime.migration] Will assume non-transactional DDL.



EVALUACI√ìN EN CONJUNTO DE TEST

Umbral utilizado: 0.000068

Muestras totales: 16062
  - Normales (0): 15162
  - An√≥malos (1): 900

--------------------------------------------------------------------------------
MATRIZ DE CONFUSI√ìN
--------------------------------------------------------------------------------

                Pred: Normal    Pred: An√≥malo  
Real: Normal    14867           295            
Real: An√≥malo   669             231            

--------------------------------------------------------------------------------
M√âTRICAS DE CLASIFICACI√ìN
--------------------------------------------------------------------------------

M√©tricas generales:
  Accuracy:           0.939983
  Specificity (TNR):  0.980543
  FPR:                0.019457

M√©tricas para clase NORMAL (0):
  Precision:          0.956939
  Recall:             0.980543
  F1-score:           0.968597

M√©tricas para clase AN√ìMALA (1):
  Precision:          0.439163
  Recall:             0.256667
  F1-

2025-11-14 16:05:01 INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
2025-11-14 16:05:01 INFO  [alembic.runtime.migration] Will assume non-transactional DDL.



‚úì M√©tricas guardadas en MLflow

‚úÖ EVALUACI√ìN COMPLETA FINALIZADA


In [32]:
# ========================================
# üöÄ ALTERNATIVA: Pipeline completo en una sola llamada
# ========================================
# Si prefieres ejecutar todo el proceso de evaluaci√≥n en una sola funci√≥n:

# Descomenta las siguientes l√≠neas para usar el pipeline completo:
"""
best_threshold, val_metrics, test_metrics = full_evaluation_pipeline(
    model=MODEL,
    val_normal_loader=dl_val_n,
    val_anomalous_loader=dl_val_a,
    test_normal_loader=dl_test_n,
    test_anomalous_loader=dl_test_a,
    device=DEVICE,
    percentiles=[80, 85, 90, 92, 94, 96, 98, 99],
    max_fpr=0.05,
    output_dir=OUTPUT_ROOT,
    verbose=True,
)

print(f"\n‚úì Pipeline completo ejecutado")
print(f"  Umbral √≥ptimo: {best_threshold:.6f}")
print(f"  F2-score en test: {test_metrics['f2_anom']:.6f}")
"""

print("üí° Para usar el pipeline completo, descomenta el c√≥digo en esta celda.")


üí° Para usar el pipeline completo, descomenta el c√≥digo en esta celda.


In [33]:
# ========================================
# üèãÔ∏è Entrenamiento ‚Äî Prefect + MLflow
# ========================================
import time
from copy import deepcopy

import matplotlib
import matplotlib.pyplot as plt
import mlflow.pytorch
import numpy as np
from prefect import task, flow
from prefect.tasks import NO_CACHE

matplotlib.use("Agg")

LOSS_FN = TRAINING.loss_fn.lower()


def loss_function(pred: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
    if LOSS_FN == "mse":
        return nn.functional.mse_loss(pred, target)
    raise NotImplementedError(f"Funci√≥n de p√©rdida no soportada: {LOSS_FN}")


def flatten_dict(d: dict, parent_key: str = "", sep: str = "__") -> dict:
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else str(k)
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)


def collect_run_params() -> dict:
    return {
        "training": asdict(TRAINING),
        "model": asdict(CONFIG.model),
        "threshold": asdict(CONFIG.threshold),
        "paths": {"output_root": str(OUTPUT_ROOT)},
        "dims": {"T": T, "C": C},
    }


@task(name="train_epoch", log_prints=False, cache_policy=NO_CACHE)
def train_epoch(model: AE1DCNN, loader: DataLoader, optimizer: torch.optim.Optimizer) -> float:
    model.train()
    total_loss = 0.0
    n_samples = 0
    model_device = next(model.parameters()).device
    non_blocking = model_device.type == "cuda"
    for xb in loader:
        xb = xb.to(model_device, non_blocking=non_blocking)
        
        # Verificar que los datos sean finitos
        if not torch.isfinite(xb).all():
            print("‚ö†Ô∏è ADVERTENCIA: Datos de entrada contienen valores no finitos")
            return float('nan')
        
        optimizer.zero_grad(set_to_none=True)
        recon = model(xb)
        
        # Verificar que la reconstrucci√≥n sea finita
        if not torch.isfinite(recon).all():
            print("‚ö†Ô∏è ADVERTENCIA: Reconstrucci√≥n contiene valores no finitos")
            return float('nan')
        
        loss = loss_function(recon, xb)
        
        # Verificar que la p√©rdida sea finita
        if not torch.isfinite(loss):
            print(f"‚ö†Ô∏è ADVERTENCIA: P√©rdida no finita: {loss.item()}")
            return float('nan')
        
        loss.backward()
        if TRAINING.clip_grad is not None:
            nn.utils.clip_grad_norm_(model.parameters(), TRAINING.clip_grad)
        optimizer.step()
        total_loss += loss.item() * xb.size(0)
        n_samples += xb.size(0)
    
    avg_loss = total_loss / max(1, n_samples)
    if not np.isfinite(avg_loss):
        print(f"‚ö†Ô∏è ADVERTENCIA: P√©rdida promedio no finita: {avg_loss}")
    return avg_loss


@task(name="eval_epoch", log_prints=False, cache_policy=NO_CACHE)
def eval_epoch(model: AE1DCNN, loader: DataLoader) -> float:
    model.eval()
    total_loss = 0.0
    n_samples = 0
    model_device = next(model.parameters()).device
    non_blocking = model_device.type == "cuda"
    with torch.no_grad():
        for xb in loader:
            xb = xb.to(model_device, non_blocking=non_blocking)
            
            # Verificar que los datos sean finitos
            if not torch.isfinite(xb).all():
                print("‚ö†Ô∏è ADVERTENCIA: Datos de validaci√≥n contienen valores no finitos")
                return float('nan')
            
            recon = model(xb)
            
            # Verificar que la reconstrucci√≥n sea finita
            if not torch.isfinite(recon).all():
                print("‚ö†Ô∏è ADVERTENCIA: Reconstrucci√≥n de validaci√≥n contiene valores no finitos")
                return float('nan')
            
            loss = loss_function(recon, xb)
            
            # Verificar que la p√©rdida sea finita
            if not torch.isfinite(loss):
                print(f"‚ö†Ô∏è ADVERTENCIA: P√©rdida de validaci√≥n no finita: {loss.item()}")
                return float('nan')
            
            total_loss += loss.item() * xb.size(0)
            n_samples += xb.size(0)
    
    avg_loss = total_loss / max(1, n_samples)
    if not np.isfinite(avg_loss):
        print(f"‚ö†Ô∏è ADVERTENCIA: P√©rdida promedio de validaci√≥n no finita: {avg_loss}")
    return avg_loss


@flow(name="train_autoencoder_1d", log_prints=True)
def train_flow() -> dict:
    global DEVICE
    set_seed_everywhere(TRAINING.seed)
    model = AE1DCNN(CONFIG.model, in_len=T).to(DEVICE)
    local_device = DEVICE

    # Warm-up para detectar problemas de CUDA
    if DEVICE.startswith("cuda"):
        try:
            with torch.no_grad():
                dummy_len = T
                dummy = torch.randn(1, CONFIG.model.in_channels, dummy_len, device=DEVICE)
                _ = model(dummy)
        except Exception as err:
            print("‚ö†Ô∏è CUDA no respondi√≥ (", err, ") -> cambiando a CPU")
            model = model.to("cpu")
            local_device = "cpu"

    DEVICE = local_device

    optimizer = torch.optim.Adam(model.parameters(), lr=TRAINING.lr, weight_decay=TRAINING.weight_decay)

    # Verificaci√≥n inicial de los datos
    print("Verificando datos de entrenamiento...")
    sample_batch = next(iter(dl_train))
    print(f"  Shape del batch: {sample_batch.shape}")
    print(f"  Min: {sample_batch.min().item():.6f}, Max: {sample_batch.max().item():.6f}")
    print(f"  Mean: {sample_batch.mean().item():.6f}, Std: {sample_batch.std().item():.6f}")
    
    # Verificar y limpiar valores no finitos
    if not torch.isfinite(sample_batch).all():
        print("  ‚ö†Ô∏è ADVERTENCIA: El batch contiene valores no finitos (NaN o Inf)")
        nan_count = (~torch.isfinite(sample_batch)).sum().item()
        print(f"  Valores no finitos: {nan_count} de {sample_batch.numel()}")
        print("  PROBLEMA: Los datos contienen valores no finitos. Esto causar√° NaN en el entrenamiento.")
        print("  SOLUCI√ìN: Verifica que los datos est√©n correctamente normalizados.")
        print("  Los datos deber√≠an estar en el rango [0, 1] despu√©s de la normalizaci√≥n min-max.")
        print("  Revisa el pipeline de preprocesamiento en 01_ecg_preprocessing_demo.ipynb")
    else:
        print("  ‚úì Datos de entrenamiento son finitos")
    
    print("\nVerificando datos de validaci√≥n...")
    val_sample_batch = next(iter(dl_val_n))
    print(f"  Shape del batch: {val_sample_batch.shape}")
    print(f"  Min: {val_sample_batch.min().item():.6f}, Max: {val_sample_batch.max().item():.6f}")
    print(f"  Mean: {val_sample_batch.mean().item():.6f}, Std: {val_sample_batch.std().item():.6f}")
    
    if not torch.isfinite(val_sample_batch).all():
        print("  ‚ö†Ô∏è ADVERTENCIA: El batch de validaci√≥n contiene valores no finitos (NaN o Inf)")
        nan_count = (~torch.isfinite(val_sample_batch)).sum().item()
        print(f"  Valores no finitos: {nan_count} de {val_sample_batch.numel()}")
        print("  PROBLEMA: Los datos contienen valores no finitos. Esto causar√° NaN en el entrenamiento.")
        print("  SOLUCI√ìN: Verifica que los datos est√©n correctamente normalizados.")
    else:
        print("  ‚úì Datos de validaci√≥n son finitos")
    
    # Verificar el rango de los datos (deber√≠an estar normalizados entre 0 y 1)
    if torch.isfinite(sample_batch).all():
        if sample_batch.min() < -1.0 or sample_batch.max() > 1.0:
            print(f"\n  ‚ö†Ô∏è ADVERTENCIA: Datos fuera del rango esperado [-1, 1]")
            print(f"     Rango actual: [{sample_batch.min().item():.6f}, {sample_batch.max().item():.6f}]")
            print(f"     Los datos deber√≠an estar normalizados entre 0 y 1 (min-max) o -1 y 1 (z-score)")

    params_dict = collect_run_params()
    params_path = OUTPUT_ROOT / "run_params.json"
    with open(params_path, "w", encoding="utf-8") as f:
        json.dump(params_dict, f, indent=2, ensure_ascii=False)

    # Asegurar que el directorio de salida existe
    OUTPUT_ROOT.mkdir(parents=True, exist_ok=True)
    
    best_ckpt = OUTPUT_ROOT / "ae_best.pt"
    curves_png = OUTPUT_ROOT / "loss_curves.png"

    with mlflow.start_run(run_name=f"train_{int(time.time())}", experiment_id=EXPERIMENT_ID) as run:
        run_id = run.info.run_id
        print("MLflow run:", run_id)
        print("Usando dispositivo:", local_device)

        flat_params = flatten_dict(params_dict)
        for k, v in flat_params.items():
            if isinstance(v, (str, int, float, bool)):
                mlflow.log_param(k, v)
            else:
                mlflow.log_param(k, str(v))
        mlflow.log_artifact(str(params_path), artifact_path="config")

        train_losses: list[float] = []
        val_losses: list[float] = []
        best_val = float("inf")

        for epoch in range(1, TRAINING.epochs + 1):
            # Llamar directamente a las funciones sin decoradores Prefect para evitar problemas de serializaci√≥n
            tr_loss = train_epoch.fn(model, dl_train, optimizer)
            va_loss = eval_epoch.fn(model, dl_val_n)

            train_losses.append(tr_loss)
            val_losses.append(va_loss)

            mlflow.log_metrics(
                {
                    "recon_mse_train": float(tr_loss),
                    "recon_mse_val": float(va_loss),
                },
                step=epoch,
            )
            print(f"Epoch {epoch:03d}/{TRAINING.epochs} | train={tr_loss:.6f} | val={va_loss:.6f}")
            
            # Verificar si hay NaN o Inf
            if not (np.isfinite(tr_loss) and np.isfinite(va_loss)):
                print(f"‚ö†Ô∏è ADVERTENCIA: P√©rdidas no finitas en epoch {epoch} (train={tr_loss}, val={va_loss})")
                print("   Esto puede indicar problemas con los datos o el modelo.")
                # Continuar pero no guardar checkpoint si hay NaN
                if np.isfinite(va_loss) and va_loss < best_val:
                    best_val = va_loss
                    torch.save(model.state_dict(), best_ckpt)
                    mlflow.log_artifact(str(best_ckpt), artifact_path="checkpoints")
            elif va_loss < best_val:
                best_val = va_loss
                torch.save(model.state_dict(), best_ckpt)
                mlflow.log_artifact(str(best_ckpt), artifact_path="checkpoints")

        # Curvas
        plt.figure(figsize=(7, 4))
        plt.plot(train_losses, label="train MSE")
        plt.plot(val_losses, label="val MSE")
        plt.xlabel("Epoch")
        plt.ylabel("Reconstr. MSE")
        plt.legend()
        plt.tight_layout()
        plt.savefig(curves_png, dpi=130)
        plt.close()
        mlflow.log_artifact(str(curves_png), artifact_path="plots")

        # Guardar mejor modelo en formato MLflow
        if best_ckpt.exists():
            best_state = torch.load(best_ckpt, map_location="cpu")
            model.load_state_dict(best_state)
            model.eval()
            mlflow.pytorch.log_model(model, artifact_path="pytorch_model")
        else:
            print("‚ö†Ô∏è ADVERTENCIA: No se guard√≥ ning√∫n checkpoint (best_ckpt no existe)")
            print("   Esto puede deberse a que todas las p√©rdidas fueron NaN o no hubo mejora.")
            # Guardar el modelo actual de todas formas
            model.eval()
            mlflow.pytorch.log_model(model, artifact_path="pytorch_model")

    return {
        "run_id": run_id,
        "best_val": best_val,
        "best_ckpt": str(best_ckpt),
        "curves_png": str(curves_png),
        "device_used": local_device,
    }


train_summary = train_flow()
print("Resumen entrenamiento:")
pprint(train_summary)



2025-11-14 16:05:44 INFO  [prefect.flow_runs] Beginning flow run 'imposing-swift' for flow 'train_autoencoder_1d'
2025-11-14 16:05:44 INFO  [prefect.flow_runs] Verificando datos de entrenamiento...
2025-11-14 16:05:44 INFO  [prefect.flow_runs]   Shape del batch: torch.Size([64, 3, 5000])
2025-11-14 16:05:44 INFO  [prefect.flow_runs]   Min: 0.000000, Max: 1.000000
2025-11-14 16:05:44 INFO  [prefect.flow_runs]   Mean: 0.410954, Std: 0.291378
2025-11-14 16:05:44 INFO  [prefect.flow_runs]   ‚úì Datos de entrenamiento son finitos
2025-11-14 16:05:44 INFO  [prefect.flow_runs] 
Verificando datos de validaci√≥n...
2025-11-14 16:05:44 INFO  [prefect.flow_runs]   Shape del batch: torch.Size([64, 3, 5000])
2025-11-14 16:05:44 INFO  [prefect.flow_runs]   Min: 0.000000, Max: 1.000000
2025-11-14 16:05:44 INFO  [prefect.flow_runs]   Mean: 0.406845, Std: 0.292782
2025-11-14 16:05:44 INFO  [prefect.flow_runs]   ‚úì Datos de validaci√≥n son finitos
2025-11-14 16:05:44 INFO  [prefect.flow_runs] MLflow ru

Resumen entrenamiento:
{'best_ckpt': 'S:\\Proyecto '
              'final\\data\\combined_ptbxl_mimic_500hz_iv1v5\\ae_best.pt',
 'best_val': 2.8826565134856912e-05,
 'curves_png': 'S:\\Proyecto '
               'final\\data\\combined_ptbxl_mimic_500hz_iv1v5\\loss_curves.png',
 'device_used': 'cuda',
 'run_id': 'eace69ca2e9c486cb7e20abdbdf2b8d1'}


In [34]:
# ========================================
# üìà Evaluaci√≥n ‚Äî reconstrucci√≥n + detecci√≥n anomal√≠as
# ========================================
from sklearn.metrics import (
    accuracy_score,
    average_precision_score,
    balanced_accuracy_score,
    classification_report,
    confusion_matrix,
    mean_absolute_error,
    mean_squared_error,
    precision_score,
    recall_score,
    f1_score,
    r2_score,
    roc_auc_score,
)


def reconstruction_errors(model: AE1DCNN, loader: DataLoader) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    model.eval()
    errs, xs, ys = [], [], []
    model_device = next(model.parameters()).device
    non_blocking = model_device.type == "cuda"
    with torch.no_grad():
        for xb in loader:
            xb = xb.to(model_device, non_blocking=non_blocking)
            recon = model(xb)
            mse = torch.mean((recon - xb) ** 2, dim=(1, 2)).cpu().numpy()
            errs.append(mse)
            xs.append(xb.cpu().numpy())
            ys.append(recon.cpu().numpy())
    return np.concatenate(errs), np.concatenate(xs), np.concatenate(ys)


def basic_metrics(x_true: np.ndarray, x_pred: np.ndarray) -> dict:
    a = x_true.reshape(-1)
    b = x_pred.reshape(-1)
    mse = mean_squared_error(a, b)
    return {
        "mae": mean_absolute_error(a, b),
        "mse": mse,
        "rmse": float(np.sqrt(mse)),
        "r2": r2_score(a, b),
    }


def pick_threshold(errs_norm: np.ndarray) -> tuple[float, str]:
    thr_cfg = CONFIG.threshold
    if thr_cfg.use_mean_std:
        mu = float(np.mean(errs_norm))
        sigma = float(np.std(errs_norm))
        return mu + thr_cfg.k_std * sigma, f"mean+{thr_cfg.k_std}*std"
    return float(np.percentile(errs_norm, thr_cfg.percentile)), f"p{thr_cfg.percentile}"


def log_confusion_artifacts(cm: np.ndarray, labels: list[str], tag: str) -> None:
    df = pd.DataFrame(
        cm,
        index=[f"Real:{lbl}" for lbl in labels],
        columns=[f"Pred:{lbl}" for lbl in labels],
    )
    csv_path = OUTPUT_ROOT / f"cm_{tag}.csv"
    png_path = OUTPUT_ROOT / f"cm_{tag}.png"
    df.to_csv(csv_path)

    fig, ax = plt.subplots(figsize=(5, 4))
    im = ax.imshow(cm, interpolation="nearest", cmap="Blues")
    ax.figure.colorbar(im, ax=ax)
    ax.set(xticks=np.arange(len(labels)), yticks=np.arange(len(labels)))
    ax.set_xticklabels([f"Pred:{lbl}" for lbl in labels])
    ax.set_yticklabels([f"Real:{lbl}" for lbl in labels])
    ax.set_xlabel("Predicci√≥n")
    ax.set_ylabel("Real")
    ax.set_title(f"Matriz de confusi√≥n ({tag})")
    thresh = cm.max() / 2.0
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, f"{cm[i, j]}", ha="center", va="center", color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    fig.savefig(png_path, dpi=140)
    plt.close(fig)

    mlflow.log_artifact(str(csv_path), artifact_path=f"eval_{tag}")
    mlflow.log_artifact(str(png_path), artifact_path=f"eval_{tag}")


best_ckpt = OUTPUT_ROOT / "ae_best.pt"
state_dict = torch.load(best_ckpt, map_location=DEVICE)
MODEL.load_state_dict(state_dict)
MODEL.to(DEVICE)
MODEL.eval()

with mlflow.start_run(run_name="eval", experiment_id=EXPERIMENT_ID):
    val_err_n, val_x_n, val_y_n = reconstruction_errors(MODEL, dl_val_n)
    metrics_val = basic_metrics(val_x_n, val_y_n)
    thr, thr_label = pick_threshold(val_err_n)

    if dl_val_a:
        val_err_a, val_x_a, val_y_a = reconstruction_errors(MODEL, dl_val_a)
        y_true_val = np.concatenate([np.zeros_like(val_err_n), np.ones_like(val_err_a)])
        y_score_val = np.concatenate([val_err_n, val_err_a])
    else:
        val_err_a = np.zeros((0,), dtype=np.float32)
        y_true_val = np.zeros_like(val_err_n)
        y_score_val = val_err_n

    y_pred_val = (y_score_val > thr).astype(int)
    cm_val = confusion_matrix(y_true_val, y_pred_val, labels=[0, 1])
    tn_val, fp_val, fn_val, tp_val = cm_val.ravel()

    def safe_metric(fn, *args, **kwargs):
        try:
            return float(fn(*args, **kwargs))
        except Exception:
            return float("nan")

    specificity_val = tn_val / max(1, tn_val + fp_val)

    metrics_clf_val = {
        "accuracy": accuracy_score(y_true_val, y_pred_val),
        "precision": precision_score(y_true_val, y_pred_val, zero_division=0),
        "recall": recall_score(y_true_val, y_pred_val, zero_division=0),
        "specificity": specificity_val,
        "f1": f1_score(y_true_val, y_pred_val, zero_division=0),
        "balanced_accuracy": balanced_accuracy_score(y_true_val, y_pred_val),
        "auroc": safe_metric(roc_auc_score, y_true_val, y_score_val),
        "auprc": safe_metric(average_precision_score, y_true_val, y_score_val),
    }

    mlflow.log_param("threshold_value", thr)
    mlflow.log_param("threshold_strategy", thr_label)
    mlflow.log_metrics({f"val_{k}": float(v) for k, v in {**metrics_val, **metrics_clf_val}.items()})

    log_confusion_artifacts(cm_val, ["Normal", "An√≥malo"], "val")
    report_val = classification_report(y_true_val, y_pred_val, target_names=["normal", "anomalo"], digits=4)
    report_val_path = OUTPUT_ROOT / "classification_report_val.txt"
    with open(report_val_path, "w", encoding="utf-8") as f:
        f.write(report_val)
    mlflow.log_artifact(str(report_val_path), artifact_path="eval_val")

    test_err_n, test_x_n, test_y_n = reconstruction_errors(MODEL, dl_test_n)
    metrics_test = basic_metrics(test_x_n, test_y_n)

    if dl_test_a:
        test_err_a, test_x_a, test_y_a = reconstruction_errors(MODEL, dl_test_a)
        y_true_test = np.concatenate([np.zeros_like(test_err_n), np.ones_like(test_err_a)])
        y_score_test = np.concatenate([test_err_n, test_err_a])
    else:
        y_true_test = np.zeros_like(test_err_n)
        y_score_test = test_err_n

    y_pred_test = (y_score_test > thr).astype(int)
    cm_test = confusion_matrix(y_true_test, y_pred_test, labels=[0, 1])
    tn_test, fp_test, fn_test, tp_test = cm_test.ravel()

    metrics_clf_test = {
        "accuracy": accuracy_score(y_true_test, y_pred_test),
        "precision": precision_score(y_true_test, y_pred_test, zero_division=0),
        "recall": recall_score(y_true_test, y_pred_test, zero_division=0),
        "specificity": tn_test / max(1, tn_test + fp_test),
        "f1": f1_score(y_true_test, y_pred_test, zero_division=0),
        "balanced_accuracy": balanced_accuracy_score(y_true_test, y_pred_test),
        "auroc": safe_metric(roc_auc_score, y_true_test, y_score_test),
        "auprc": safe_metric(average_precision_score, y_true_test, y_score_test),
    }

    mlflow.log_metrics({f"test_{k}": float(v) for k, v in {**metrics_test, **metrics_clf_test}.items()})

    log_confusion_artifacts(cm_test, ["Normal", "An√≥malo"], "test")
    report_test = classification_report(y_true_test, y_pred_test, target_names=["normal", "anomalo"], digits=4)
    report_test_path = OUTPUT_ROOT / "classification_report_test.txt"
    with open(report_test_path, "w", encoding="utf-8") as f:
        f.write(report_test)
    mlflow.log_artifact(str(report_test_path), artifact_path="eval_test")

    # Ejemplos de reconstrucci√≥n
    if test_x_n.shape[0] > 0:
        leads = ["II", "V1", "V5"]
        t_axis = np.arange(test_x_n.shape[2]) / 500.0
        for i, lead in enumerate(leads):
            fig, ax = plt.subplots(figsize=(10, 3))
            ax.plot(t_axis, test_x_n[0, i], label="input")
            ax.plot(t_axis, test_y_n[0, i], label="recon", alpha=0.85)
            ax.set_title(f"Reconstrucci√≥n Test ‚Äî {lead}")
            ax.set_xlabel("Tiempo (s)")
            ax.legend()
            fig.tight_layout()
            img_path = OUTPUT_ROOT / f"recon_{lead}_test.png"
            fig.savefig(img_path, dpi=130)
            plt.close(fig)
            mlflow.log_artifact(str(img_path), artifact_path="recon_examples")

print("Evaluaci√≥n completa. Revisa MLflow para artefactos y m√©tricas.")



Evaluaci√≥n completa. Revisa MLflow para artefactos y m√©tricas.


## ‚úÖ Checklist final

- Ajusta cualquier hiperpar√°metro en la celda de **Config** o edita `../config/ae1d_config.json`.
- Tras instalar PyTorch cu128 por primera vez, **reinicia el kernel** para que tome las DLL.
- Si el entrenamiento se ejecuta en CPU, revisa drivers CUDA / reinicia y vuelve a correr la celda de setup.
- Los artefactos (checkpoints, curvas, reportes) quedan en `OUTPUT_ROOT` y en MLflow (`../mlflow_artifacts`).
- Para ejecutar desde terminal: `prefect deployment run train_autoencoder_1d/train_flow` (opcional).
