# Escenario M (w=60 → h=5)

# Evaluación de Ensembles (GRU, CNN, Transformer)

Este cuaderno:
- Carga datos y reutiliza el mismo **MinMaxScaler** ajustado con *train*.
- Construye el dataset de **test** para el escenario especificado.
- Calcula métricas **log_cosh** y **AUTC** para cada modelo individual.
- Evalúa 6 esquemas de **promediado** (simple, ponderado por `log_cosh`, por `AUTC`, y mezclas 25/75, 50/50, 75/25).
- Imprime una tabla con resultados.


In [1]:
import os, glob
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from joblib import load, dump


In [2]:
# --- Parámetros ---
CSV_PATH = './data/cierres_diarios_2005_2025n.csv'
SCALER_PATH = 'scaler_modelos.joblib'

# --- Carga CSV ---
df = pd.read_csv(CSV_PATH, parse_dates=['Date'], index_col='Date')

# --- Limpieza nulos ---
df.ffill(inplace=True)
df.bfill(inplace=True)

# --- Split en crudo (antes de escalar) ---
n = len(df)
train_raw = df.iloc[:int(n*0.7)]
val_raw   = df.iloc[int(n*0.7):int(n*0.9)]
test_raw  = df.iloc[int(n*0.9):]

# --- Reusar scaler si existe; si no, ajustarlo SOLO con train ---
if os.path.exists(SCALER_PATH):
    scaler = load(SCALER_PATH)
else:
    scaler = MinMaxScaler()
    scaler.fit(train_raw)
    dump(scaler, SCALER_PATH)

# --- Transformar splits con el MISMO scaler ---
train_df = pd.DataFrame(scaler.transform(train_raw), index=train_raw.index, columns=train_raw.columns).astype("float32")
val_df   = pd.DataFrame(scaler.transform(val_raw),   index=val_raw.index,   columns=val_raw.columns).astype("float32")
test_df  = pd.DataFrame(scaler.transform(test_raw),  index=test_raw.index,  columns=test_raw.columns).astype("float32")


In [3]:
def make_dataset(data, window_size, horizon, batch_size=32, shuffle=True):
    ds = tf.keras.preprocessing.timeseries_dataset_from_array(
        data=data.values,
        targets=None,
        sequence_length=window_size + horizon,
        sequence_stride=1,
        batch_size=batch_size,
        shuffle=shuffle
    )
    return ds.map(
        lambda seq: (
            tf.cast(seq[:, :window_size, :], tf.float32),
            tf.cast(seq[:, window_size:, :], tf.float32)
        )
    )


In [4]:
# --- Métricas (log_cosh + AUTC) ---
eps_list = [0.005, 0.01, 0.02, 0.05, 0.1]

def _eps_tag(e: float) -> str:
    s = f"{e:.6f}".rstrip("0").rstrip(".")
    return s.replace(".", "_")

def _get_metric_value_relaxed(res: dict, base: str) -> float:
    if base in res:
        return res[base]
    for k in res.keys():
        if k.startswith(base):
            return res[k]
    return float("nan")

def compute_autc_from_results(res: dict, eps_list) -> float:
    eps = np.array(sorted(eps_list), dtype=np.float64)
    acc, missing = [], []
    for e in eps:
        base = f"within_eps_{_eps_tag(e)}"
        val = _get_metric_value_relaxed(res, base)
        acc.append(val)
        if not np.isfinite(val):
            missing.append(base)
    acc = np.array(acc, dtype=np.float64)
    mask = np.isfinite(acc)
    if mask.sum() < 2:
        return float("nan")
    return float(np.trapz(acc[mask], eps[mask]) / (eps[mask][-1] - eps[mask][0]))

def log_cosh_np(y_true: np.ndarray, y_pred: np.ndarray) -> float:
    return float(np.mean(np.log(np.cosh(y_pred - y_true))))

def within_eps_dict(y_true: np.ndarray, y_pred: np.ndarray, eps_list) -> dict:
    res = {}
    abs_err = np.abs(y_pred - y_true)  # soporta (N,H,F)
    for e in eps_list:
        res[f"within_eps_{_eps_tag(e)}"] = float(np.mean(abs_err <= e))
    return res


In [5]:
def evaluate_model(model: tf.keras.Model, test_ds: tf.data.Dataset, eps_list):
    y_true_list, y_pred_list = [], []
    for xb, yb in test_ds:
        y_true_list.append(yb.numpy())
        y_pred_list.append(model(xb, training=False).numpy())
    y_true = np.concatenate(y_true_list, axis=0)
    y_pred = np.concatenate(y_pred_list, axis=0)

    res = within_eps_dict(y_true, y_pred, eps_list)
    return {
        "log_cosh": log_cosh_np(y_true, y_pred),
        "AUTC": compute_autc_from_results(res, eps_list),
        "within": res,
        "y_true": y_true,
        "y_pred": y_pred,
    }

def weighted_average(preds_list, weights):
    w = np.array(weights, dtype=np.float64)
    w = w / w.sum()
    stacked = np.stack(preds_list, axis=0)  # (3, N, H, F)
    return np.tensordot(w, stacked, axes=(0, 0))  # -> (N, H, F)

def mix_weights(res_list, alpha_log: float):
    # log_cosh: menor es mejor => usar inverso
    log_vec  = np.array([1/r["log_cosh"] for r in res_list], dtype=np.float64)
    autc_vec = np.array([r["AUTC"]       for r in res_list], dtype=np.float64)
    log_w  = log_vec  / log_vec.sum()
    autc_w = autc_vec / autc_vec.sum()
    return alpha_log * log_w + (1.0 - alpha_log) * autc_w

def run_all_ensembles(model_gru, model_cnn, model_trans, test_ds, eps_list):
    r_gru  = evaluate_model(model_gru,  test_ds, eps_list)
    r_cnn  = evaluate_model(model_cnn,  test_ds, eps_list)
    r_tran = evaluate_model(model_trans, test_ds, eps_list)

    y_true  = r_gru["y_true"]
    preds   = [r_gru["y_pred"], r_cnn["y_pred"], r_tran["y_pred"]]
    reslist = [r_gru, r_cnn, r_tran]

    schemes = {
        "simple":               [1.0, 1.0, 1.0],
        "ponderado_log":        [1/r_gru["log_cosh"], 1/r_cnn["log_cosh"], 1/r_tran["log_cosh"]],
        "ponderado_autc":       [r_gru["AUTC"], r_cnn["AUTC"], r_tran["AUTC"]],
        "mix_25log_75autc":     mix_weights(reslist, alpha_log=0.25),
        "mix_50log_50autc":     mix_weights(reslist, alpha_log=0.50),
        "mix_75log_25autc":     mix_weights(reslist, alpha_log=0.75),
    }

    def eval_from_pred(y_true, y_pred):
        res = within_eps_dict(y_true, y_pred, eps_list)
        return {"log_cosh": log_cosh_np(y_true, y_pred),
                "AUTC": compute_autc_from_results(res, eps_list)}

    results = {
        "individual": {
            "GRU":  {"log_cosh": r_gru["log_cosh"],  "AUTC": r_gru["AUTC"]},
            "CNN":  {"log_cosh": r_cnn["log_cosh"],  "AUTC": r_cnn["AUTC"]},
            "TRANS":{"log_cosh": r_tran["log_cosh"], "AUTC": r_tran["AUTC"]},
        },
        "ensembles": {}
    }

    for name, w in schemes.items():
        y_pred_ens = weighted_average(preds, w)
        results["ensembles"][name] = eval_from_pred(y_true, y_pred_ens)

    return results

def pick_model(pattern: str) -> str:
    matches = glob.glob(pattern)
    if not matches:
        raise FileNotFoundError(f"No se encontraron modelos con patrón: {pattern}")
    matches.sort(key=lambda p: os.path.getmtime(p), reverse=True)
    return matches[0]


In [6]:
# === Escenario M: ventana=60, horizonte=5 ===
train_dsm = make_dataset(train_df, window_size=60, horizon=5, batch_size=32, shuffle=True)
val_dsm   = make_dataset(val_df,   window_size=60, horizon=5, batch_size=32, shuffle=False)
test_dsm  = make_dataset(test_df,  window_size=60, horizon=5, batch_size=32, shuffle=False)  # <- usar éste

gru_path  = pick_model("./models_gru_huber_sweep/gru_huber_w60_h5_delta0_1_M.keras")
cnn_path  = pick_model("./models_cnn_huber_sweep/cnn_huber_w60_h5_delta0_02_M.keras")
tran_path = pick_model("./models_transformer_huber_sweep/transformer_huber_w60_h5_delta0_1_M.keras")

model_gru   = tf.keras.models.load_model(gru_path,  compile=False)
model_cnn   = tf.keras.models.load_model(cnn_path,  compile=False)
model_trans = tf.keras.models.load_model(tran_path, compile=False)

results_M = run_all_ensembles(model_gru, model_cnn, model_trans, test_dsm, eps_list)

print("Resultados individuales (M - w60 h5):")
for k, v in results_M["individual"].items():
    print(f"{k:>9} -> log_cosh={v['log_cosh']:.6f} | AUTC={v['AUTC']:.6f}")

print("\nResultados ensembles (M):")
for k, v in results_M["ensembles"].items():
    print(f"{k:>20} -> log_cosh={v['log_cosh']:.6f} | AUTC={v['AUTC']:.6f}")


2025-09-23 23:15:34.724814: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2025-09-23 23:15:34.724874: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-09-23 23:15:34.724892: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2025-09-23 23:15:34.724935: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-09-23 23:15:34.724965: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2025-09-23 23:15:35.635152: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2025-09-23 23:15:35.970057: W ten

Resultados individuales (M - w60 h5):
      GRU -> log_cosh=1.347702 | AUTC=0.047233
      CNN -> log_cosh=1.466199 | AUTC=0.042731
    TRANS -> log_cosh=1.761740 | AUTC=0.013818

Resultados ensembles (M):
              simple -> log_cosh=1.286618 | AUTC=0.068660
       ponderado_log -> log_cosh=1.274581 | AUTC=0.073878
      ponderado_autc -> log_cosh=1.248080 | AUTC=0.077785
    mix_25log_75autc -> log_cosh=1.251745 | AUTC=0.074718
    mix_50log_50autc -> log_cosh=1.257388 | AUTC=0.072351
    mix_75log_25autc -> log_cosh=1.265005 | AUTC=0.073756


2025-09-23 23:15:36.227482: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
