# ü´Ä 1D CNN Autoencoder (PTB-XL 500 Hz II/V1/V5) + MLflow + Prefect

Este notebook entrena y eval√∫a un **Autoencoder 1D CNN** sobre las derivaciones **II, V1 y V5** de PTB‚ÄëXL (500 Hz), con:
- **MLflow** para registrar *experimentos*, *par√°metros*, *m√©tricas* y *artefactos*.
- **Prefect** para orquestar el *pipeline* (tareas / procesos).
- M√©tricas de reconstrucci√≥n: **EMA (MAE)**, **EMC (MSE)**, **RMSE** y **R¬≤**.
- Matriz de confusi√≥n sobre detecci√≥n por **umbral de error de reconstrucci√≥n** (*an√≥malo si error > umbral*), con **TP/FP/TN/FN**.

> **Nota:** Ajusta los par√°metros en la secci√≥n **Config**. Concretamente: rutas de datos, *batch size*, *learning rate*, *√©pocas*, *umbral*, etc.

## üß∞ Celda 0 ‚Äî Dependencias (instalaci√≥n r√°pida)

- Instala **mlflow** y **prefect** si no estuvieran presentes.
- Tambi√©n instala `scikit-learn` (para m√©tricas) y `matplotlib`.

In [1]:
# ========================================
# üß∞ CELDA 0 ‚Äî INSTALACIONES
# ========================================
# Ejecuta una sola vez (o cuando falte algo). Si ya lo tienes, puedes omitirla.

import sys
import os
import subprocess

print(f"Kernel Python: {sys.executable}")

_added_dll_dirs: list[str] = []

if hasattr(os, "add_dll_directory"):
    _cuda_env_keys = [
        "CUDA_PATH",
        "CUDA_PATH_V12_8",
        "CUDA_PATH_V12_9",
        "CUDA_PATH_V13_0",
    ]
    dll_candidates: list[str] = []
    for key in _cuda_env_keys:
        base = os.environ.get(key)
        if base:
            dll_candidates.extend([
                os.path.join(base, "bin"),
                os.path.join(base, "libnvvp"),
            ])
    cudnn_root = os.environ.get("CUDNN_PATH") or os.environ.get("CUDNN_ROOT")
    if cudnn_root:
        dll_candidates.append(os.path.join(cudnn_root, "bin"))
    # Algunas instalaciones dejan CUDA en el directorio est√°ndar aunque la variable no exista
    default_cuda_root = r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA"
    default_cuda = os.path.join(default_cuda_root, "v12.8")
    if os.path.isdir(default_cuda_root) and not os.path.isdir(default_cuda):
        # si hay otra versi√≥n (por ejemplo v12.9), la a√±adimos tambi√©n
        for entry in sorted(os.listdir(default_cuda_root)):
            candidate = os.path.join(default_cuda_root, entry)
            if os.path.isdir(candidate):
                dll_candidates.extend([
                    os.path.join(candidate, "bin"),
                    os.path.join(candidate, "libnvvp"),
                ])
    if os.path.isdir(default_cuda):
        dll_candidates.extend([
            os.path.join(default_cuda, "bin"),
            os.path.join(default_cuda, "libnvvp"),
        ])

    # cuDNN en rutas t√≠picas
    cudnn_search_roots = [
        r"C:\Program Files\NVIDIA\CUDNN",
        r"C:\Program Files\NVIDIA\CUDNN\v9.1",
        r"C:\tools\cuda",
    ]
    for root in cudnn_search_roots:
        if os.path.isdir(root):
            for entry in os.listdir(root):
                candidate = os.path.join(root, entry, "bin") if not entry.lower().endswith("bin") else os.path.join(root, entry)
                if os.path.isdir(candidate):
                    dll_candidates.append(candidate)
            # si el propio root tiene bin directo
            bin_dir = os.path.join(root, "bin")
            if os.path.isdir(bin_dir):
                dll_candidates.append(bin_dir)

    # A√±adimos expl√≠citamente la carpeta de libs de PyTorch
    torch_lib_dir = os.path.join(os.path.dirname(sys.executable), "Lib", "site-packages", "torch", "lib")
    dll_candidates.append(torch_lib_dir)

    unique_candidates = []
    seen = set()
    for path in dll_candidates:
        if path and path not in seen:
            unique_candidates.append(path)
            seen.add(path)

    for path in unique_candidates:
        if os.path.isdir(path):
            try:
                os.add_dll_directory(path)
                _added_dll_dirs.append(path)
            except FileNotFoundError:
                pass
    if _added_dll_dirs:
        os.environ["PATH"] = os.pathsep.join(_added_dll_dirs + [os.environ.get("PATH", "")])
        print("DLL dirs a√±adidos:", "; ".join(_added_dll_dirs))


def pip_install(pkg: str) -> None:
    try:
        __import__(pkg.split("==")[0].split("[")[0].replace("-", "_"))
        print(f"‚úî {pkg} ya instalado")
    except Exception:
        print(f"‚è≥ Instalando {pkg} ...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", pkg])


BASE_PACKAGES = [
    "mlflow",
    "prefect>=3",
    "matplotlib",
    "scikit-learn",
    "pandas",
    "numpy",
]

for pkg in BASE_PACKAGES:
    pip_install(pkg)


def check_torch_build() -> None:
    try:
        import torch
    except ImportError:
        print("‚ö†Ô∏è PyTorch no est√° instalado.")
        print("   Para RTX 5080 instala el nightly cu128:")
        print("   pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128")
        return
    except OSError as err:
        print("‚ö†Ô∏è No se pudo inicializar PyTorch (DLL error).")
        if _added_dll_dirs:
            print("   Rutas DLL a√±adidas en esta sesi√≥n:")
            for p in _added_dll_dirs:
                print("    -", p)
        print("   A√±ad√≠ rutas CUDA conocidas via os.add_dll_directory, pero si persiste:")
        print("   1) Revisa que CUDA 12.8 est√© instalado en 'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.8'.")
        print("   2) Aseg√∫rate de tener el Microsoft Visual C++ 2022 Redistributable (x64) instalado.")
        print("   3) Reinicia el kernel tras instalar/ajustar drivers.")
        print("   Error original:", err)
        return

    version = getattr(torch, "__version__", "desconocida")
    cuda_tag = getattr(getattr(torch, "version", object()), "cuda", "desconocida")
    print(f"torch={version} | torch.version.cuda={cuda_tag}")

    if "cu128" not in version and not str(cuda_tag).startswith("12.8"):
        print("‚ö†Ô∏è Esta build no es CUDA 12.8 nightly. Para la RTX 5080 usa el comando:")
        print("   pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128")

    if not torch.cuda.is_available():
        print("‚ö†Ô∏è torch.cuda.is_available() -> False. Revisa drivers / reinicia kernel tras instalar el nightly cu128.")
        return

    try:
        props = torch.cuda.get_device_properties(0)
        sm = f"sm_{props.major}{props.minor}"
        print(f"GPU detectada: {props.name} ({sm})")
        if props.major < 12:
            print("‚ö†Ô∏è La GPU detectada no es Blackwell (sm_120). Ajusta TORCH_CUDA_ARCH_LIST seg√∫n tu hardware.")
    except Exception as err:
        print("‚ö†Ô∏è Error al consultar la GPU:", err)
        print("   Tras reinstalar PyTorch nightly, reinicia el kernel del notebook.")


check_torch_build()
print("\nListo. Si te pide reiniciar el kernel, hazlo y contin√∫a.")

Kernel Python: c:\Python311\python.exe
DLL dirs a√±adidos: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\bin; C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\libnvvp; c:\Python311\Lib\site-packages\torch\lib
‚úî mlflow ya instalado
‚è≥ Instalando prefect>=3 ...
‚úî matplotlib ya instalado
‚è≥ Instalando scikit-learn ...
‚úî pandas ya instalado
‚úî numpy ya instalado
‚ö†Ô∏è No se pudo inicializar PyTorch (DLL error).
   Rutas DLL a√±adidas en esta sesi√≥n:
    - C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\bin
    - C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\libnvvp
    - c:\Python311\Lib\site-packages\torch\lib
   A√±ad√≠ rutas CUDA conocidas via os.add_dll_directory, pero si persiste:
   1) Revisa que CUDA 12.8 est√© instalado en 'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.8'.
   2) Aseg√∫rate de tener el Microsoft Visual C++ 2022 Redistributable (x64) instalado.
   3) Reinicia el kernel tras instalar/ajustar drivers.
   Error 

## ‚öôÔ∏è Celda 1 ‚Äî Imports y **Config** (ajusta aqu√≠)

- Ajusta `OUTPUT_ROOT` para apuntar a la carpeta creada en la extracci√≥n (**X_norm_mm.dat**, etc.).  
- Ajusta **hiperpar√°metros de entrenamiento** (`EPOCHS`, `BATCH_SIZE`, `LR`, etc.)  
- Ajusta **arquitectura** del autoencoder en `MODEL_CFG` (n¬∫ filtros, *kernel sizes*, *act.*).
- Ajusta **MLflow** (`MLFLOW_TRACKING_URI`, `EXPERIMENT_NAME`). Por defecto usa carpeta local `./mlruns`.
- Define la **estrategia de umbral** para detecci√≥n (por percentil o Œº+K¬∑œÉ).

In [2]:
# ========================================
# ‚öôÔ∏è CELDA 1 ‚Äî IMPORTS + CONFIGURACI√ìN
# ========================================

from pathlib import Path
import os, json, math, time, random
import numpy as np
import pandas as pd
from typing import Tuple, Dict

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import mlflow
from mlflow import pytorch as mlflow_pytorch

from prefect import task, flow

import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, confusion_matrix, classification_report

# ---------- RUTAS DE DATOS ----------
OUTPUT_ROOT = Path("../data/ptbxl_500hz_iv1v5")
X_NORM_MM_DAT = OUTPUT_ROOT / "X_norm_mm.dat"
X_NORM_RAW_DAT = OUTPUT_ROOT / "X_norm_raw.dat"
X_NORM_FILT_DAT = OUTPUT_ROOT / "X_norm_filt.dat"
X_ANOM_MM_NPY = OUTPUT_ROOT / "X_anom_mm.npy"
DIMS_JSON     = OUTPUT_ROOT / "dims.json"
SPLITS_DIR = OUTPUT_ROOT / "splits"
IDX_NORM_TRAIN = SPLITS_DIR / "idx_norm_train.npy"
IDX_NORM_VAL   = SPLITS_DIR / "idx_norm_val.npy"
IDX_NORM_TEST  = SPLITS_DIR / "idx_norm_test.npy"
IDX_ANOM_TRAIN = SPLITS_DIR / "idx_anom_train.npy"
IDX_ANOM_VAL   = SPLITS_DIR / "idx_anom_val.npy"
IDX_ANOM_TEST  = SPLITS_DIR / "idx_anom_test.npy"

# ---------- HIPERPAR√ÅMETROS (aj√∫stalos) ----------
SEED       = 42
DEVICE     = "cuda" if torch.cuda.is_available() else "cpu"
EPOCHS     = 25
BATCH_SIZE = 64
LR         = 3e-4
WEIGHT_DECAY = 1e-5

# ---------- ARQUITECTURA AUTOENCODER (aj√∫stala) ----------
MODEL_CFG = dict(
    in_channels=3,
    base_filters=32,
    leak=0.1,
    kernels=[11, 7, 9, 11],
)

LOSS_FN = "mse"
CLIP_GRAD = None

# ---------- MLFLOW ----------
MLFLOW_TRACKING_URI = "file:./mlruns"
EXPERIMENT_NAME     = "ptbxl_ae_1dcnn_iv1v5"
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME)

# ---------- UMBRAL DETECCI√ìN ----------
THRESHOLD_BY_PERCENTILE = 98.0
USE_MEAN_STD = False
K_STD = 3.0

def set_seed(seed=SEED):
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)

def open_memmap_known_TC(path: Path, T: int, C: int, mode="r", dtype=np.float32):
    assert path.exists(), f"No encuentro {path}"
    bytes_total = os.path.getsize(path)
    if bytes_total % 4 != 0:
        raise RuntimeError(f"{path} no es m√∫ltiplo de 4 bytes (float32).")
    n_float32 = bytes_total // 4
    if n_float32 % (T*C) != 0:
        raise RuntimeError(f"Tama√±o inconsistente: n_float32 % (T*C) != 0")
    N = n_float32 // (T*C)
    return np.memmap(path, dtype=dtype, mode=mode, shape=(N, T, C))

set_seed(SEED)
with open(DIMS_JSON) as f:
    dims = json.load(f)
T = int(dims["T"]); C = int(dims["C"])
assert C == 3, f"Esperaba 3 derivaciones, C={C}"
print(f"Dispositivo: {DEVICE} | T={T}, C={C}")

OSError: [WinError 1114] Error en una rutina de inicializaci√≥n de biblioteca de v√≠nculos din√°micos (DLL). Error loading "c:\Python311\Lib\site-packages\torch\lib\c10.dll" or one of its dependencies.

In [3]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())

OSError: [WinError 1114] Error en una rutina de inicializaci√≥n de biblioteca de v√≠nculos din√°micos (DLL). Error loading "c:\Python311\Lib\site-packages\torch\lib\c10.dll" or one of its dependencies.

## üß© Celda 1b ‚Äî Configuraci√≥n f√°cil (1 solo lugar)

- Cambia par√°metros aqu√≠ o crea un archivo `../config/ae1d_config.json` para sobreescribir.
- Se imprime un resumen claro de lo aplicado.
- Todo queda registrado en MLflow v√≠a `params.json` (ya implementado).


In [None]:
# ========================================
# ‚öôÔ∏è CELDA 1b ‚Äî CONFIG F√ÅCIL (dict + JSON override)
# ========================================
from copy import deepcopy

CONFIG_PATH = Path("../config/ae1d_config.json")

DEFAULT_CONFIG = {
    "data": {
        "output_root": str((Path("../data/ptbxl_500hz_iv1v5")).resolve()),
        "dims": {"T": None, "C": 3},  # T se toma de dims.json
    },
    "training": {
        "seed": 42,
        "device": "auto",  # "auto" -> cuda si disponible, sino cpu
        "epochs": 25,
        "batch_size": 64,
        "lr": 3e-4,
        "weight_decay": 1e-5,
        "clip_grad": None,
        "loss_fn": "mse",
    },
    "model": {
        "in_channels": 3,
        "base_filters": 32,
        "leak": 0.1,
        "kernels": [11, 7, 9, 11],
    },
    "threshold": {
        "use_mean_std": False,
        "k_std": 3.0,
        "percentile": 98.0,
    },
    "mlflow": {
        "experiment_name": "ptbxl_ae_1dcnn_iv1v5",
        "tracking_uri": None,  # usa el que se fija m√°s abajo (sqlite del padre)
    },
}

def deep_update(base: dict, upd: dict) -> dict:
    out = deepcopy(base)
    for k, v in (upd or {}).items():
        if isinstance(v, dict) and isinstance(out.get(k), dict):
            out[k] = deep_update(out[k], v)
        else:
            out[k] = v
    return out


def load_user_config(path: Path | None) -> dict | None:
    if path and path.exists():
        try:
            with open(path, "r", encoding="utf-8") as f:
                return json.load(f)
        except Exception as e:
            print("‚ö†Ô∏è No se pudo leer config JSON:", e)
    return None


def apply_config(cfg: dict):
    # Data
    global OUTPUT_ROOT, X_NORM_MM_DAT, X_NORM_RAW_DAT, X_NORM_FILT_DAT, X_ANOM_MM_NPY, DIMS_JSON, SPLITS_DIR
    OUTPUT_ROOT = Path(cfg["data"]["output_root"]).resolve()
    X_NORM_MM_DAT = OUTPUT_ROOT / "X_norm_mm.dat"
    X_NORM_RAW_DAT = OUTPUT_ROOT / "X_norm_raw.dat"
    X_NORM_FILT_DAT = OUTPUT_ROOT / "X_norm_filt.dat"
    X_ANOM_MM_NPY = OUTPUT_ROOT / "X_anom_mm.npy"
    DIMS_JSON     = OUTPUT_ROOT / "dims.json"
    SPLITS_DIR    = OUTPUT_ROOT / "splits"

    # Training
    global SEED, DEVICE, EPOCHS, BATCH_SIZE, LR, WEIGHT_DECAY, CLIP_GRAD, LOSS_FN
    SEED        = int(cfg["training"]["seed"])
    if cfg["training"]["device"] == "auto":
        DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    else:
        DEVICE = str(cfg["training"]["device"]).lower()
    EPOCHS      = int(cfg["training"]["epochs"])
    BATCH_SIZE  = int(cfg["training"]["batch_size"])
    LR          = float(cfg["training"]["lr"])
    WEIGHT_DECAY= float(cfg["training"]["weight_decay"])
    CLIP_GRAD   = cfg["training"]["clip_grad"]
    LOSS_FN     = str(cfg["training"]["loss_fn"]).lower()

    # Model
    global MODEL_CFG
    MODEL_CFG = {
        "in_channels": int(cfg["model"]["in_channels"]),
        "base_filters": int(cfg["model"]["base_filters"]),
        "leak": float(cfg["model"]["leak"]),
        "kernels": list(cfg["model"]["kernels"]),
    }

    # Threshold
    global USE_MEAN_STD, K_STD, THRESHOLD_BY_PERCENTILE
    USE_MEAN_STD = bool(cfg["threshold"]["use_mean_std"])
    K_STD = float(cfg["threshold"]["k_std"])
    THRESHOLD_BY_PERCENTILE = float(cfg["threshold"]["percentile"])

    # MLflow (solo nombre si se quisiera usar mlruns local)
    global EXPERIMENT_NAME
    EXPERIMENT_NAME = str(cfg["mlflow"]["experiment_name"]) 


# ==== cargar + aplicar ====
_user_cfg = load_user_config(CONFIG_PATH)
ACTIVE_CONFIG = deep_update(DEFAULT_CONFIG, _user_cfg or {})
apply_config(ACTIVE_CONFIG)

print("Config aplicada:")
print("- output_root:", OUTPUT_ROOT)
print("- device:", DEVICE, "| epochs:", EPOCHS, "| batch_size:", BATCH_SIZE, "| lr:", LR)
print("- model:", MODEL_CFG)
print("- threshold:", {"use_mean_std": USE_MEAN_STD, "k_std": K_STD, "percentile": THRESHOLD_BY_PERCENTILE})


In [None]:
# (Opcional) Guardar plantilla de config si no existe
CFG_DIR = CONFIG_PATH.parent
CFG_DIR.mkdir(parents=True, exist_ok=True)
if not CONFIG_PATH.exists():
    with open(CONFIG_PATH, "w", encoding="utf-8") as f:
        json.dump(DEFAULT_CONFIG, f, ensure_ascii=False, indent=2)
    print("Plantilla creada en:", CONFIG_PATH)
else:
    print("Usando config en:", CONFIG_PATH)


## üßæ Celda 2 ‚Äî Dataset y DataLoaders

- `MemmapDataset` abre el `.dat` **sin cargar todo a RAM** y devuelve tensores `[C, T]` para `Conv1d` de PyTorch.  
- Entrenamos con **normales**; validaci√≥n y test combinan normales y an√≥malos para evaluaci√≥n de reconstrucci√≥n/anomal√≠a.
- Cambia tama√±os de *batch* en `BATCH_SIZE`.

In [None]:
# ========================================
# üßæ CELDA 2 ‚Äî DATASET + DATALOADERS
# ========================================

class MemmapDataset(Dataset):
    def __init__(self, x_memmap, indices):
        self.x = x_memmap
        self.indices = np.array(indices, dtype=np.int64)
    def __len__(self):
        return len(self.indices)
    def __getitem__(self, i):
        idx = int(self.indices[i])
        arr = self.x[idx]          # [T, C]
        arr = np.transpose(arr, (1,0)).copy() # [C, T] y writable
        return torch.from_numpy(arr).float()

X_norm_mm = open_memmap_known_TC(X_NORM_MM_DAT, T, C, mode="r")
X_anom_mm = np.load(X_ANOM_MM_NPY) if X_ANOM_MM_NPY.exists() else np.zeros((0,T,C),dtype=np.float32)

idx_norm_train = np.load(IDX_NORM_TRAIN)
idx_norm_val   = np.load(IDX_NORM_VAL)
idx_norm_test  = np.load(IDX_NORM_TEST)
idx_anom_val   = np.load(IDX_ANOM_VAL) if IDX_ANOM_VAL.exists() else np.zeros((0,),dtype=np.int64)
idx_anom_test  = np.load(IDX_ANOM_TEST) if IDX_ANOM_TEST.exists() else np.zeros((0,),dtype=np.int64)

ds_train = MemmapDataset(X_norm_mm, idx_norm_train)
ds_val_n = MemmapDataset(X_norm_mm, idx_norm_val)
ds_test_n= MemmapDataset(X_norm_mm, idx_norm_test)

if len(X_anom_mm):
    class NpyDataset(Dataset):
        def __init__(self, x_npy, indices):
            self.x = x_npy
            self.indices = np.array(indices, dtype=np.int64)
        def __len__(self):
            return len(self.indices)
        def __getitem__(self, i):
            idx = int(self.indices[i])
            arr = self.x[idx]
            arr = np.transpose(arr, (1,0)).copy()
            return torch.from_numpy(arr).float()
    ds_val_a  = NpyDataset(X_anom_mm, idx_anom_val) if len(idx_anom_val)>0 else None
    ds_test_a = NpyDataset(X_anom_mm, idx_anom_test) if len(idx_anom_test)>0 else None
else:
    ds_val_a = ds_test_a = None

dl_train = DataLoader(ds_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=0, drop_last=False)
dl_val_n = DataLoader(ds_val_n, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
dl_test_n= DataLoader(ds_test_n, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
dl_val_a  = DataLoader(ds_val_a,  batch_size=BATCH_SIZE, shuffle=False, num_workers=0) if ds_val_a else None
dl_test_a = DataLoader(ds_test_a, batch_size=BATCH_SIZE, shuffle=False, num_workers=0) if ds_test_a else None

print("Train (norm):", len(ds_train), "| Val norm:", len(ds_val_n), "| Test norm:", len(ds_test_n))
print("Val anom:", 0 if dl_val_a is None else len(ds_val_a), "| Test anom:", 0 if dl_test_a is None else len(ds_test_a))

## üß† Celda 3 ‚Äî Modelo Autoencoder 1D CNN

Arquitectura **encoder‚Äìdecoder** inspirada en la tabla provista. Ajustable v√≠a `MODEL_CFG`:

- **Encoder**: Conv1d + LeakyReLU + MaxPool (reduce temporal).  
- **Decoder**: Upsample (recupera longitud) + Conv1d + activaciones.  
- **Salida**: `sigmoid` (supone entrada min‚Äìmax ‚àà [0,1]).  
- **Cropping**: si la longitud no calza (por *pool/upsample*), se recorta al tama√±o original.

In [None]:
# ========================================
# üß† CELDA 3 ‚Äî DEFINICI√ìN DEL MODELO
# ========================================

class Cropping1D(nn.Module):
    def __init__(self, target_len: int):
        super().__init__()
        self.target_len = target_len
    def forward(self, x):
        Tprime = x.shape[-1]
        if Tprime == self.target_len:
            return x
        if Tprime > self.target_len:
            start = (Tprime - self.target_len) // 2
            end = start + self.target_len
            return x[..., start:end]
        else:
            pad = self.target_len - Tprime
            left = pad // 2
            right = pad - left
            return nn.functional.pad(x, (left, right), mode="reflect")

class AE1DCNN(nn.Module):
    def __init__(self, cfg: Dict, in_len: int):
        super().__init__()
        ch_in  = cfg["in_channels"]
        base   = cfg["base_filters"]
        leak   = cfg["leak"]
        k1,k2,k3,k4 = cfg["kernels"]
        act = nn.LeakyReLU(leak, inplace=True)
        self.enc = nn.Sequential(
            nn.Conv1d(ch_in, base*2, kernel_size=k1, padding=k1//2), act,
            nn.MaxPool1d(kernel_size=2, stride=2),
            nn.Conv1d(base*2, base, kernel_size=k2, padding=k2//2), act,
            nn.MaxPool1d(kernel_size=2, stride=2),
            nn.Conv1d(base, base, kernel_size=k3, padding=k3//2), act,
            nn.Conv1d(base, base, kernel_size=k4, padding=k4//2), act,
        )
        self.dec = nn.Sequential(
            nn.Upsample(scale_factor=2, mode="nearest"),
            nn.Conv1d(base, base*2, kernel_size=k3, padding=k3//2), act,
            nn.Upsample(scale_factor=2, mode="nearest"),
            nn.Conv1d(base*2, base, kernel_size=k2, padding=k2//2), act,
            nn.Conv1d(base, ch_in, kernel_size=k1, padding=k1//2),
            nn.Sigmoid()
        )
        self.crop = Cropping1D(in_len)
    def forward(self, x):
        z = self.enc(x)
        y = self.dec(z)
        return self.crop(y)

model = AE1DCNN(MODEL_CFG, in_len=T).to(DEVICE)
n_params = sum(p.numel() for p in model.parameters())
print(model)
print(f"Total params: {n_params/1e6:.3f} M")

In [None]:
# ==== MLflow: usar la BD un directorio arriba (../mlflow.db) y artefactos fijos ====
from pathlib import Path
import mlflow
from mlflow.tracking import MlflowClient

# 1) Construimos rutas absolutas hacia el directorio padre del notebook
PARENT_DIR = Path.cwd().parent.resolve()                  # <- ../ (absoluto)
TRACKING_DB = (PARENT_DIR / "mlflow.db").resolve()        # ../mlflow.db
ARTIF_ROOT  = (PARENT_DIR / "mlflow_artifacts").resolve() # ../mlflow_artifacts/
ARTIF_ROOT.mkdir(parents=True, exist_ok=True)

# 2) Fijamos el tracking URI apuntando a la BD del directorio padre
#    (OJO: en Windows hay que usar file:/// y paths estilo POSIX)
mlflow.set_tracking_uri(f"sqlite:///{TRACKING_DB.as_posix()}")

# 3) Creamos/obtenemos el experimento con artifact_location expl√≠cito en ../mlflow_artifacts
client = MlflowClient()
EXP_NAME = "ae1d"
exp = client.get_experiment_by_name(EXP_NAME)
if exp is None:
    EXP_ID = client.create_experiment(EXP_NAME, artifact_location=ARTIF_ROOT.as_uri())
else:
    EXP_ID = exp.experiment_id

# 4) Si qued√≥ alg√∫n run abierto por errores previos de notebook, lo cerramos
if mlflow.active_run() is not None:
    print("Cerrando run previo:", mlflow.active_run().info.run_id)
    mlflow.end_run()

print(">>> Tracking URI:", mlflow.get_tracking_uri())
print(">>> Experiment ID:", EXP_ID)
print(">>> Artifact root:", ARTIF_ROOT.as_uri())


## üèãÔ∏è Celda 4 ‚Äî Entrenamiento con **Prefect** + **MLflow**

- `@task` para `train_epoch` y `eval_epoch` (devuelven p√©rdida media).  
- `@flow` principal: inicia **MLflow run**, loguea **par√°metros**, curva de **loss**, mejor **checkpoint**, y artefactos (gr√°ficos).

In [None]:
# ========================================
# üèãÔ∏è CELDA 4 ‚Äî ENTRENAMIENTO con PREFECT + MLFLOW (versi√≥n robusta)
# ========================================

import time, json
from pathlib import Path
import matplotlib
matplotlib.use("Agg")  # backend no interactivo para guardar figuras
import matplotlib.pyplot as plt
import mlflow
import mlflow.pytorch as mlflow_pytorch
import torch
import torch.nn as nn
from prefect import task, flow

# --------- p√©rdida ---------
def loss_fn(pred, target):
    if LOSS_FN == "mse":
        return nn.functional.mse_loss(pred, target)
    raise NotImplementedError(f"LOSS_FN no soportada: {LOSS_FN}")

# --------- helpers de logging ----------
def count_params(m: torch.nn.Module):
    total = sum(p.numel() for p in m.parameters())
    trainable = sum(p.numel() for p in m.parameters() if p.requires_grad)
    return total, trainable

def _flatten_dict(d, parent_key="", sep="__"):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else str(k)
        if isinstance(v, dict):
            items.extend(_flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

def collect_run_params() -> dict:
    params = {
        "seed": SEED,
        "device": DEVICE,
        "epochs": EPOCHS,
        "batch_size": BATCH_SIZE,
        "learning_rate": LR,
        "weight_decay": WEIGHT_DECAY,
        "loss_fn": LOSS_FN,
        "threshold": {
            "use_mean_std": USE_MEAN_STD,
            "k_std": K_STD,
            "percentile": THRESHOLD_BY_PERCENTILE,
        },
        "model": MODEL_CFG,
        "data": {
            "output_root": str(OUTPUT_ROOT.resolve()),
            "x_norm_mm": str(X_NORM_MM_DAT),
            "x_anom_npy": str(X_ANOM_MM_NPY),
            "dims_json": str(DIMS_JSON),
        },
        "dims": {"T": int(T), "C": int(C)},
    }
    return params

# --------- tareas Prefect ----------
@task(name="train_epoch", log_prints=True)
def train_epoch(model: nn.Module, loader, optimizer) -> float:
    model.train()
    total = 0.0; n = 0
    for xb in loader:
        xb = xb.to(DEVICE, non_blocking=True)
        optimizer.zero_grad(set_to_none=True)
        yb = model(xb)
        loss = loss_fn(yb, xb)  # Reconstruction MSE
        loss.backward()
        if CLIP_GRAD is not None:
            nn.utils.clip_grad_norm_(model.parameters(), CLIP_GRAD)
        optimizer.step()
        total += loss.item() * xb.size(0)
        n += xb.size(0)
    return total / max(1, n)

@task(name="eval_epoch", log_prints=True)
def eval_epoch(model: nn.Module, loader) -> float:
    model.eval()
    total = 0.0; n = 0
    with torch.no_grad():
        for xb in loader:
            xb = xb.to(DEVICE, non_blocking=True)
            yb = model(xb)
            loss = loss_fn(yb, xb)  # Reconstruction MSE
            total += loss.item() * xb.size(0)
            n += xb.size(0)
    return total / max(1, n)

# --------- flow principal ----------
@flow(name="train_autoencoder_1d", log_prints=True)
def train_flow():
    global DEVICE
    set_seed(SEED)
    OUTPUT_ROOT.mkdir(parents=True, exist_ok=True)
    best_path = (OUTPUT_ROOT / "ae_best.pt").resolve()
    curves_path = (OUTPUT_ROOT / "loss_curves.png").resolve()
    params_path = (OUTPUT_ROOT / "params.json").resolve()
    model.to(DEVICE)

    # Fallback seguro: si CUDA est√° disponible pero la GPU no soporta kernels de esta build, cambiamos a CPU
    if DEVICE.startswith("cuda"):
        try:
            model.eval()
            with torch.no_grad():
                test_len = min(T, 512)
                xb = torch.randn(1, MODEL_CFG["in_channels"], test_len, device=DEVICE)
                _ = model(xb)
        except Exception as e:
            print("‚ö†Ô∏è Problema con CUDA en este dispositivo -> usando CPU. Causa:", e)
            DEVICE = "cpu"
            model.to(DEVICE)

    with mlflow.start_run(run_name=f"ae1d_{int(time.time())}", experiment_id=EXP_ID) as run:
        run_id = run.info.run_id
        print("RUN:", run_id)
        print("Tracking URI:", mlflow.get_tracking_uri())
        print("Artifact URI:", mlflow.get_artifact_uri())

        # Log de par√°metros (flatten) + guardar como artifact params.json
        params = collect_run_params()
        flat_params = _flatten_dict(params)
        # Solo valores primitivos admitidos por MLflow en log_params
        mlflow.log_params({k: (v if isinstance(v, (str, int, float, bool)) else str(v)) for k, v in flat_params.items()})
        with open(params_path, "w", encoding="utf-8") as f:
            json.dump(params, f, ensure_ascii=False, indent=2)
        mlflow.log_artifact(str(params_path), artifact_path="config")

        optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
        tr_losses, val_losses = [], []
        best_val = float("inf")

        for ep in range(1, EPOCHS + 1):
            tr = train_epoch(model, dl_train, optimizer)
            va = eval_epoch(model, dl_val_n)

            tr_losses.append(tr); val_losses.append(va)
            mlflow.log_metrics({"recon_mse_train": float(tr), "recon_mse_val": float(va)}, step=ep)

            print(f"[{ep:03d}/{EPOCHS}] recon_mse_train={tr:.6f} | recon_mse_val={va:.6f}")

            if va < best_val:
                best_val = va
                torch.save(model.state_dict(), best_path)
                assert best_path.exists()
                mlflow.log_artifact(str(best_path), artifact_path="checkpoints")

        # curvas
        plt.figure(figsize=(7, 4))
        plt.plot(tr_losses, label="train MSE")
        plt.plot(val_losses, label="val MSE")
        plt.xlabel("Epoch"); plt.ylabel("Reconstruction MSE")
        plt.legend(); plt.tight_layout()
        plt.savefig(curves_path, dpi=130); plt.close()
        assert curves_path.exists()
        mlflow.log_artifact(str(curves_path), artifact_path="plots")

        # modelo
        model.load_state_dict(torch.load(best_path, map_location="cpu"))
        model.eval()
        mlflow_pytorch.log_model(model, artifact_path="pytorch_model")

        return {
            "best_ckpt": str(best_path),
            "curves_png": str(curves_path),
            "best_val": float(best_val),
            "mlflow_run_id": run_id,
        }

# ---- ejecuci√≥n sincr√≥nica del flow (local) ----
train_artifacts = train_flow()
print("Artifacts:", train_artifacts)


## üìà Celda 5 ‚Äî Evaluaci√≥n completa + Umbral de anomal√≠a

- Calcula **EMA (MAE)**, **EMC (MSE)**, **RMSE**, **R¬≤** sobre **validaci√≥n** y **test** (solo reconstrucci√≥n).
- Fija **umbral** con percentil (o Œº+K¬∑œÉ) usando errores por **registro** en validaci√≥n **normal**.
- Aplica umbral para clasificar **an√≥malo** (error > umbral) vs **normal** (error ‚â§ umbral).
- Construye **matriz de confusi√≥n (TP/FP/TN/FN)** y **reporte** en validaci√≥n y test.
- Loguea todo en **MLflow**.

In [None]:
# ========================================
# üìà CELDA 5 ‚Äî EVALUACI√ìN + MATRIZ DE CONFUSI√ìN
# ========================================

def reconstruct_errors(model, loader):
    model.eval(); errs = []; xs_all = []; ys_all = []
    with torch.no_grad():
        for xb in loader:
            xb = xb.to(DEVICE)
            yb = model(xb)
            mse = torch.mean((yb - xb)**2, dim=(1,2)).detach().cpu().numpy()
            errs.append(mse)
            xs_all.append(xb.detach().cpu().numpy())
            ys_all.append(yb.detach().cpu().numpy())
    return np.concatenate(errs), np.concatenate(xs_all), np.concatenate(ys_all)

def basic_regression_metrics(x_true, x_pred):
    a = x_true.reshape(-1); b = x_pred.reshape(-1)
    mae = mean_absolute_error(a, b)
    mse = mean_squared_error(a, b)
    rmse = math.sqrt(mse)
    r2 = r2_score(a, b)
    return dict(mae=mae, mse=mse, rmse=rmse, r2=r2)

def pick_threshold(errs_norm_val):
    if USE_MEAN_STD:
        mu = float(np.mean(errs_norm_val)); sd = float(np.std(errs_norm_val))
        thr = mu + K_STD*sd; how = f"mean+{K_STD}*std"
    else:
        thr = float(np.percentile(errs_norm_val, THRESHOLD_BY_PERCENTILE))
        how = f"p{THRESHOLD_BY_PERCENTILE}"
    return thr, how

best_path = (OUTPUT_ROOT / "ae_best.pt").resolve()
model.load_state_dict(torch.load(best_path, map_location=DEVICE))

with mlflow.start_run(run_name="eval", experiment_id=EXP_ID):
    val_err_n, val_xn, val_yn = reconstruct_errors(model, dl_val_n)
    reg_val = basic_regression_metrics(val_xn, val_yn)
    thr, how = pick_threshold(val_err_n)
    if dl_val_a is not None:
        val_err_a, val_xa, val_ya = reconstruct_errors(model, dl_val_a)
        y_true = np.concatenate([np.zeros_like(val_err_n), np.ones_like(val_err_a)])
        y_pred = np.concatenate([val_err_n > thr, val_err_a > thr]).astype(int)
    else:
        y_true = np.zeros_like(val_err_n)
        y_pred = (val_err_n > thr).astype(int)
    cm = confusion_matrix(y_true, y_pred, labels=[0,1])
    report = classification_report(y_true, y_pred, target_names=["normal","anomalo"], digits=4)
    mlflow.log_metrics({f"val_{k}": v for k,v in reg_val.items()})
    mlflow.log_param("threshold_how", how)
    mlflow.log_param("threshold_value", thr)
    cm_df = pd.DataFrame(cm, index=["Real:Normal","Real:Anomalo"], columns=["Pred:Normal","Pred:Anomalo"])
    cm_path = OUTPUT_ROOT / "cm_val.csv"; cm_df.to_csv(cm_path)
    with open(OUTPUT_ROOT / "report_val.txt","w") as f: f.write(report)
    mlflow.log_artifact(str(cm_path), artifact_path="eval_val")
    mlflow.log_artifact(str(OUTPUT_ROOT / "report_val.txt"), artifact_path="eval_val")

    test_err_n, test_xn, test_yn = reconstruct_errors(model, dl_test_n)
    reg_test = basic_regression_metrics(test_xn, test_yn)
    if dl_test_a is not None:
        test_err_a, test_xa, test_ya = reconstruct_errors(model, dl_test_a)
        y_true_t = np.concatenate([np.zeros_like(test_err_n), np.ones_like(test_err_a)])
        y_pred_t = np.concatenate([test_err_n > thr, test_err_a > thr]).astype(int)
    else:
        y_true_t = np.zeros_like(test_err_n)
        y_pred_t = (test_err_n > thr).astype(int)
    cm_t = confusion_matrix(y_true_t, y_pred_t, labels=[0,1])
    report_t = classification_report(y_true_t, y_pred_t, target_names=["normal","anomalo"], digits=4)
    mlflow.log_metrics({f"test_{k}": v for k,v in reg_test.items()})
    cm_t_df = pd.DataFrame(cm_t, index=["Real:Normal","Real:Anomalo"], columns=["Pred:Normal","Pred:Anomalo"])
    cm_t_path = OUTPUT_ROOT / "cm_test.csv"; cm_t_df.to_csv(cm_t_path)
    with open(OUTPUT_ROOT / "report_test.txt","w") as f: f.write(report_t)
    mlflow.log_artifact(str(cm_t_path), artifact_path="eval_test")
    mlflow.log_artifact(str(OUTPUT_ROOT / "report_test.txt"), artifact_path="eval_test")

    if test_xn.shape[0] > 0:
        ex_in  = test_xn[0]; ex_out = test_yn[0]
        leads = ["II","V1","V5"]; t = np.arange(ex_in.shape[1]) / 500.0
        for i, ld in enumerate(leads):
            plt.figure(figsize=(10,3))
            plt.plot(t, ex_in[i], label="input")
            plt.plot(t, ex_out[i], label="recon", alpha=0.85)
            plt.title(f"Reconstrucci√≥n Test ‚Äî {ld}")
            plt.xlabel("s"); plt.tight_layout(); plt.legend()
            pth = OUTPUT_ROOT / f"recon_{ld}_test.png"
            plt.savefig(pth, dpi=130); plt.close()
            mlflow.log_artifact(str(pth), artifact_path="recon_examples")

print("Evaluaci√≥n completada. Ver artefactos en MLflow.")

In [None]:
# ========================================
# üìà CELDA 5b ‚Äî EVALUACI√ìN (mejorada) + MATRICES DE CONFUSI√ìN LEGIBLES
# ========================================

def reconstruct_errors(model, loader):
    model.eval(); errs = []; xs_all = []; ys_all = []
    with torch.no_grad():
        for xb in loader:
            xb = xb.to(DEVICE)
            yb = model(xb)
            mse = torch.mean((yb - xb)**2, dim=(1,2)).detach().cpu().numpy()
            errs.append(mse)
            xs_all.append(xb.detach().cpu().numpy())
            ys_all.append(yb.detach().cpu().numpy())
    return np.concatenate(errs), np.concatenate(xs_all), np.concatenate(ys_all)

def basic_regression_metrics(x_true, x_pred):
    a = x_true.reshape(-1); b = x_pred.reshape(-1)
    mae = mean_absolute_error(a, b)
    mse = mean_squared_error(a, b)
    rmse = math.sqrt(mse)
    r2 = r2_score(a, b)
    return dict(mae=mae, mse=mse, rmse=rmse, r2=r2)

def pick_threshold(errs_norm_val):
    if USE_MEAN_STD:
        mu = float(np.mean(errs_norm_val)); sd = float(np.std(errs_norm_val))
        thr = mu + K_STD*sd; how = f"mean+{K_STD}*std"
    else:
        thr = float(np.percentile(errs_norm_val, THRESHOLD_BY_PERCENTILE))
        how = f"p{THRESHOLD_BY_PERCENTILE}"
    return thr, how

def plot_confusion_matrix(cm, labels, title, path_png, normalize=True):
    import numpy as _np
    fig, ax = plt.subplots(figsize=(5,4))
    data = cm.astype(float)
    if normalize:
        row_sums = data.sum(axis=1, keepdims=True)
        row_sums[row_sums == 0] = 1.0
        data = data / row_sums
        fmt = ".2f"
    else:
        fmt = "d"
    im = ax.imshow(data, interpolation="nearest", cmap="Blues")
    ax.figure.colorbar(im, ax=ax)
    ax.set(xticks=_np.arange(cm.shape[1]), yticks=_np.arange(cm.shape[0]),
           xticklabels=[f"Pred:{l}" for l in labels], yticklabels=[f"Real:{l}" for l in labels],
           ylabel="Real", xlabel="Predicci√≥n", title=title)
    thresh = data.max() / 2.0 if data.size else 0.5
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(data[i, j], fmt), ha="center", va="center",
                    color="white" if data[i, j] > thresh else "black")
    fig.tight_layout()
    fig.savefig(path_png, dpi=140)
    plt.close(fig)

best_path = (OUTPUT_ROOT / "ae_best.pt").resolve()
model.load_state_dict(torch.load(best_path, map_location=DEVICE))

from sklearn.metrics import roc_auc_score, average_precision_score, accuracy_score, precision_score, recall_score, f1_score, balanced_accuracy_score

with mlflow.start_run(run_name="eval_plus", experiment_id=EXP_ID):
    # --- Validaci√≥n ---
    val_err_n, val_xn, val_yn = reconstruct_errors(model, dl_val_n)
    reg_val = basic_regression_metrics(val_xn, val_yn)
    thr, how = pick_threshold(val_err_n)

    if dl_val_a is not None:
        val_err_a, val_xa, val_ya = reconstruct_errors(model, dl_val_a)
        y_true_val = np.concatenate([np.zeros_like(val_err_n), np.ones_like(val_err_a)])
        y_score_val = np.concatenate([val_err_n, val_err_a])
        y_pred_val = (y_score_val > thr).astype(int)
    else:
        y_true_val = np.zeros_like(val_err_n)
        y_score_val = val_err_n
        y_pred_val = (val_err_n > thr).astype(int)

    cm_val = confusion_matrix(y_true_val, y_pred_val, labels=[0,1])
    acc_val = accuracy_score(y_true_val, y_pred_val)
    prec_val = precision_score(y_true_val, y_pred_val, zero_division=0)
    rec_val = recall_score(y_true_val, y_pred_val, zero_division=0)
    f1_val = f1_score(y_true_val, y_pred_val, zero_division=0)
    balacc_val = balanced_accuracy_score(y_true_val, y_pred_val)
    tn, fp, fn, tp = cm_val.ravel() if cm_val.size == 4 else (0,0,0,0)
    spec_val = tn / (tn + fp) if (tn + fp) > 0 else 0.0
    try:
        auroc_val = roc_auc_score(y_true_val, y_score_val)
    except Exception:
        auroc_val = float("nan")
    try:
        auprc_val = average_precision_score(y_true_val, y_score_val)
    except Exception:
        auprc_val = float("nan")

    mlflow.log_param("threshold_how", how)
    mlflow.log_param("threshold_value", thr)
    mlflow.log_metrics({
        **{f"val_{k}": v for k, v in reg_val.items()},
        "val_accuracy": acc_val,
        "val_precision": prec_val,
        "val_recall": rec_val,
        "val_f1": f1_val,
        "val_balanced_accuracy": balacc_val,
        "val_specificity": spec_val,
        "val_auroc": auroc_val,
        "val_auprc": auprc_val,
    })

    cm_val_df = pd.DataFrame(cm_val, index=["Real:Normal","Real:Anomalo"], columns=["Pred:Normal","Pred:Anomalo"])
    cm_val_csv = OUTPUT_ROOT / "cm_val.csv"; cm_val_df.to_csv(cm_val_csv)
    with open(OUTPUT_ROOT / "report_val.txt","w") as f:
        f.write(classification_report(y_true_val, y_pred_val, target_names=["normal","anomalo"], digits=4))
    cm_val_png = OUTPUT_ROOT / "cm_val.png"
    plot_confusion_matrix(cm_val, ["Normal","Anomalo"], "Matriz de confusi√≥n (Val)", cm_val_png, normalize=True)
    mlflow.log_artifact(str(cm_val_csv), artifact_path="eval_val")
    mlflow.log_artifact(str(OUTPUT_ROOT / "report_val.txt"), artifact_path="eval_val")
    mlflow.log_artifact(str(cm_val_png), artifact_path="eval_val")

    # --- Test ---
    test_err_n, test_xn, test_yn = reconstruct_errors(model, dl_test_n)
    reg_test = basic_regression_metrics(test_xn, test_yn)
    if dl_test_a is not None:
        test_err_a, test_xa, test_ya = reconstruct_errors(model, dl_test_a)
        y_true_t = np.concatenate([np.zeros_like(test_err_n), np.ones_like(test_err_a)])
        y_score_t = np.concatenate([test_err_n, test_err_a])
        y_pred_t = (y_score_t > thr).astype(int)
    else:
        y_true_t = np.zeros_like(test_err_n)
        y_score_t = test_err_n
        y_pred_t = (test_err_n > thr).astype(int)

    cm_t = confusion_matrix(y_true_t, y_pred_t, labels=[0,1])
    acc_t = accuracy_score(y_true_t, y_pred_t)
    prec_t = precision_score(y_true_t, y_pred_t, zero_division=0)
    rec_t = recall_score(y_true_t, y_pred_t, zero_division=0)
    f1_t = f1_score(y_true_t, y_pred_t, zero_division=0)
    balacc_t = balanced_accuracy_score(y_true_t, y_pred_t)
    tn, fp, fn, tp = cm_t.ravel() if cm_t.size == 4 else (0,0,0,0)
    spec_t = tn / (tn + fp) if (tn + fp) > 0 else 0.0
    try:
        auroc_t = roc_auc_score(y_true_t, y_score_t)
    except Exception:
        auroc_t = float("nan")
    try:
        auprc_t = average_precision_score(y_true_t, y_score_t)
    except Exception:
        auprc_t = float("nan")

    mlflow.log_metrics({
        **{f"test_{k}": v for k, v in reg_test.items()},
        "test_accuracy": acc_t,
        "test_precision": prec_t,
        "test_recall": rec_t,
        "test_f1": f1_t,
        "test_balanced_accuracy": balacc_t,
        "test_specificity": spec_t,
        "test_auroc": auroc_t,
        "test_auprc": auprc_t,
    })

    cm_t_df = pd.DataFrame(cm_t, index=["Real:Normal","Real:Anomalo"], columns=["Pred:Normal","Pred:Anomalo"])
    cm_t_csv = OUTPUT_ROOT / "cm_test.csv"; cm_t_df.to_csv(cm_t_csv)
    with open(OUTPUT_ROOT / "report_test.txt","w") as f:
        f.write(classification_report(y_true_t, y_pred_t, target_names=["normal","anomalo"], digits=4))
    cm_t_png = OUTPUT_ROOT / "cm_test.png"
    plot_confusion_matrix(cm_t, ["Normal","Anomalo"], "Matriz de confusi√≥n (Test)", cm_t_png, normalize=True)
    mlflow.log_artifact(str(cm_t_csv), artifact_path="eval_test")
    mlflow.log_artifact(str(OUTPUT_ROOT / "report_test.txt"), artifact_path="eval_test")
    mlflow.log_artifact(str(cm_t_png), artifact_path="eval_test")

    # --- Ejemplos de reconstrucci√≥n ---
    if test_xn.shape[0] > 0:
        ex_in  = test_xn[0]; ex_out = test_yn[0]
        leads = ["II","V1","V5"]; t = np.arange(ex_in.shape[1]) / 500.0
        for i, ld in enumerate(leads):
            plt.figure(figsize=(10,3))
            plt.plot(t, ex_in[i], label="input")
            plt.plot(t, ex_out[i], label="recon", alpha=0.85)
            plt.title(f"Reconstrucci√≥n Test ‚Äî {ld}")
            plt.xlabel("s"); plt.tight_layout(); plt.legend()
            pth = OUTPUT_ROOT / f"recon_{ld}_test.png"
            plt.savefig(pth, dpi=130); plt.close()
            mlflow.log_artifact(str(pth), artifact_path="recon_examples")

print("Evaluaci√≥n mejorada completada. Ver artefactos en MLflow.")


## ‚úçÔ∏è ¬øD√≥nde cambiar par√°metros?

- **Rutas y archivos**: Celda **1** (`OUTPUT_ROOT`, nombres `.dat/.npy`).  
- **Hiperpar√°metros**: Celda **1** (`EPOCHS`, `BATCH_SIZE`, `LR`, `WEIGHT_DECAY`, `SEED`).  
- **Modelo**: Celda **1** (`MODEL_CFG`: `base_filters`, `kernels`, `leak`, etc.).  
- **P√©rdida**: Celda **4** (`LOSS_FN="mse"`).  
- **Umbral**: Celda **1** (`THRESHOLD_BY_PERCENTILE`, `USE_MEAN_STD`, `K_STD`).  
- **MLflow**: Celda **1** (`MLFLOW_TRACKING_URI`, `EXPERIMENT_NAME`).  
- **Prefect**: El *flow* y *tasks* est√°n en la **Celda 4**; agrega m√°s tareas seg√∫n necesites.

In [None]:
import torch, torch.nn as nn

dev = torch.device("cuda:0")
m = nn.Conv1d(3, 16, kernel_size=7, padding=3).to(dev)
x = torch.randn(4, 3, 2500, device=dev)
y = m(x)
(y.pow(2).mean()).backward()
print("Conv1d forward/backward en", dev, "OK ‚úÖ")
