In [None]:
#Descarga de índices bursátiles desde Yahoo Finances.
# -- Sólo descomentar una vez, el resto de usos se pueden hacer desde el fichero descargado
"""
import yfinance as yf

tickers = [
    "^GSPC","^IXIC","^DJI","^RUT",
    "^FTSE","^GDAXI","^FCHI","^125904-USD-STRD","^IBEX",
    "^N225","^HSI","000001.SS","^KS11","^BSESN",
    "^GSPTSE","^BVSP","^MXX","^MERV",
    "^AXJO","^NZ50",
    "ES=F","NQ=F","YM=F","ZT=F","^VIX"
]

# Descarga de cierres diarios sin agrupar por ticker
data = yf.download(
    tickers,
    start="2005-01-01",
    end="2025-01-02"
)

cierres = data["Close"]  # DataFrame con cada ticker como columna
cierres.to_csv("./data/cierres_diarios_2005_2025.csv")
"""

In [1]:
#Uso de los datos descargados para entrenamiento
# -- Asegurarse de enrutamiento y nombre de fichero correctos

import pandas as pd
df = pd.read_csv('./data/cierres_diarios_2005_2025n.csv', parse_dates=['Date'], index_col='Date')

In [2]:
# Preparación de los datos

# Eliminación de nulos
df.ffill(inplace=True)
df.bfill(inplace=True)

# Split en crudo (¡antes de escalar!)
n = len(df)
train_raw = df.iloc[:int(n*0.7)]
val_raw   = df.iloc[int(n*0.7):int(n*0.9)]
test_raw  = df.iloc[int(n*0.9):]

# Ajustar scaler SOLO con TRAIN
from sklearn.preprocessing import MinMaxScaler
from joblib import dump
scaler = MinMaxScaler()
scaler.fit(train_raw)  # <- fit solo con train

# Transformar cada split con ese scaler
import pandas as pd
train_df = pd.DataFrame(scaler.transform(train_raw), index=train_raw.index, columns=train_raw.columns).astype("float32")
val_df   = pd.DataFrame(scaler.transform(val_raw),   index=val_raw.index,   columns=val_raw.columns).astype("float32")
test_df  = pd.DataFrame(scaler.transform(test_raw),  index=test_raw.index,  columns=test_raw.columns).astype("float32")

# Guardar scaler para escenarios S/M/L
dump(scaler, "scaler_modelos.joblib")


import tensorflow as tf

def make_dataset(data, window_size, horizon, batch_size=32, shuffle=True):
    ds = tf.keras.preprocessing.timeseries_dataset_from_array(
        data=data.values,
        targets=None,
        sequence_length=window_size + horizon,
        sequence_stride=1,
        batch_size=batch_size,
        shuffle=shuffle
    )
    return ds.map(
        lambda seq: (
            tf.cast(seq[:, :window_size, :], tf.float32),
            tf.cast(seq[:, window_size:, :], tf.float32)
        )
    )


# Recreamos datasets con shuffle=False para val/test:
train_dss = make_dataset(train_df, window_size=20,  horizon=1,  batch_size=32, shuffle=True)
val_dss   = make_dataset(val_df,   window_size=20,  horizon=1,  batch_size=32, shuffle=False)
test_dss  = make_dataset(test_df,  window_size=20,  horizon=1,  batch_size=32, shuffle=False)

train_dsm = make_dataset(train_df, window_size=60,  horizon=5,  batch_size=32, shuffle=True)
val_dsm   = make_dataset(val_df,   window_size=60,  horizon=5,  batch_size=32, shuffle=False)
test_dsm  = make_dataset(test_df,  window_size=60,  horizon=5,  batch_size=32, shuffle=False)

train_dsl = make_dataset(train_df, window_size=120, horizon=20, batch_size=32, shuffle=True)
val_dsl   = make_dataset(val_df,   window_size=120, horizon=20, batch_size=32, shuffle=False)
test_dsl  = make_dataset(test_df,  window_size=120, horizon=20, batch_size=32, shuffle=False)

2025-09-19 08:49:21.392940: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2025-09-19 08:49:21.392967: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-09-19 08:49:21.392973: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2025-09-19 08:49:21.392988: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-09-19 08:49:21.392999: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [3]:
# =========================================================
# TRANSFORMER + Huber (sweep delta)
# Selección de δ por métrica común: val_log_cosh
# Grid de δ calibrado con cuantiles de |e| (baseline persistencia) en VAL
# Guardado .keras (Flask-ready, compile=False)
# =========================================================
import os
import numpy as np
import tensorflow as tf
from typing import List, Tuple
from tensorflow.keras import Model
from tensorflow.keras.layers import (
    Input, Dense, Dropout, LayerNormalization, Embedding,
    MultiHeadAttention, Reshape
)
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# ---------- Scaler único (MinMax) ----------
try:
    scaler
except NameError:
    from joblib import load
    scaler = load("scaler_modelos.joblib")

# ---------- Config ----------
MODEL_DIR = "./models_transformer_huber_sweep"
os.makedirs(MODEL_DIR, exist_ok=True)

# EPS para métricas within (escala normalizada [0,1])
EPS_LIST = [0.005, 0.01, 0.02, 0.05, 0.1]

# ---------- Utils ----------
def infer_shapes_from_dataset(ds: tf.data.Dataset) -> Tuple[int, int, int]:
    for xb, yb in ds.take(1):
        w = int(xb.shape[1]); f = int(xb.shape[2]); h = int(yb.shape[1])
        return w, f, h
    raise ValueError("Dataset vacío")

def _eps_tag(x: float) -> str:
    s = f"{x:.10g}"
    s = s.rstrip('0').rstrip('.') if '.' in s else s
    return s.replace('.', '_')

# ---------- Métricas ----------
def log_cosh_metric(y_true, y_pred):
    e = tf.cast(y_pred, tf.float32) - tf.cast(y_true, tf.float32)
    ae = tf.abs(e)
    return tf.reduce_mean(ae + tf.nn.softplus(-2.0 * ae) - tf.math.log(2.0))
log_cosh_metric.__name__ = "log_cosh"

def make_within_eps_vector_metric(eps_vec: np.ndarray, tag: str):
    eps_tf = tf.constant(eps_vec.astype(np.float32), dtype=tf.float32)  # (F,)
    def within_eps(y_true, y_pred):
        diff = tf.abs(tf.cast(y_pred, tf.float32) - tf.cast(y_true, tf.float32))  # (B,H,F)
        thr  = eps_tf[tf.newaxis, tf.newaxis, :]
        hit  = tf.cast(diff <= thr, tf.float32)
        return tf.reduce_mean(hit)
    within_eps.__name__ = tag
    return within_eps

def build_within_metrics_minmax(scaler, eps_list: List[float], n_features: int):
    if not hasattr(scaler, "data_range_"):
        raise ValueError("Se esperaba MinMaxScaler con data_range_.")
    if len(scaler.data_range_) != n_features:
        raise ValueError("scaler.data_range_ no coincide con n_features.")
    metrics = [log_cosh_metric]
    for e in eps_list:
        eps_vec = np.full((n_features,), float(e), dtype=np.float32)
        metrics.append(make_within_eps_vector_metric(eps_vec, f"within_eps_{_eps_tag(e)}"))
    return metrics

def _get_metric_value_relaxed(res: dict, base_name: str):
    """
    Devuelve res[base_name] si existe; si no, busca cualquier clave que empiece por base_name + '_'
    (para cubrir sufijos que Keras añade como '_1', '_2', etc.). Si no hay match, devuelve NaN.
    """
    if base_name in res and np.isfinite(res[base_name]):
        return float(res[base_name])
    # Busca con sufijo
    for k, v in res.items():
        if k.startswith(base_name + "_"):
            try:
                v = float(v)
                if np.isfinite(v):
                    return v
            except Exception:
                pass
    return float("nan")

def compute_autc_from_results(res: dict, eps_list) -> float:
    eps = np.array(sorted(eps_list), dtype=np.float64)
    acc = []
    missing = []
    for e in eps:
        base = f"within_eps_{_eps_tag(e)}"
        val = _get_metric_value_relaxed(res, base)
        acc.append(val)
        if not np.isfinite(val):
            missing.append(base)
    acc = np.array(acc, dtype=np.float64)

    # Debug útil si algo falla
    if missing:
        print("[AUTC] Aviso: no se encontraron métricas:", missing)
        print("[AUTC] Claves disponibles:", sorted([k for k in res.keys() if "within_eps_" in k]))

    mask = np.isfinite(acc)
    if mask.sum() < 2:
        return float("nan")

    return float(np.trapz(acc[mask], eps[mask]) / (eps[mask][-1] - eps[mask][0]))


# ---------- Bloque Transformer ----------
def transformer_encoder(x, d_model: int, num_heads: int, ff_dim: int, dropout: float):
    attn_out = MultiHeadAttention(
        num_heads=num_heads,
        key_dim=d_model // num_heads,
        dropout=dropout
    )(x, x)
    x = LayerNormalization(epsilon=1e-6)(x + Dropout(dropout)(attn_out))

    ffn = Dense(ff_dim, activation='relu')(x)
    ffn = Dropout(dropout)(ffn)
    ffn = Dense(d_model)(ffn)
    x = LayerNormalization(epsilon=1e-6)(x + Dropout(dropout)(ffn))
    return x

# ---------- Modelo Transformer ----------
def build_transformer_point_model(window: int, n_features: int, horizon: int,
                                  d_model: int = 128, num_heads: int = 4,
                                  num_layers: int = 2, ff_dim: int = 256,
                                  dropout: float = 0.1) -> Model:
    inp = Input(shape=(window, n_features))
    x = Dense(d_model)(inp)  # (B, W, d_model)

    positions = tf.range(start=0, limit=window, delta=1)
    pos_emb = Embedding(input_dim=window, output_dim=d_model)(positions)  # (W, d_model)
    x = x + pos_emb  # broadcast sobre batch

    for _ in range(num_layers):
        x = transformer_encoder(x, d_model, num_heads, ff_dim, dropout)

    x_last = x[:, -1, :]  # (B, d_model)

    h = Dense(ff_dim, activation='relu')(x_last)
    h = Dropout(dropout)(h)
    h = Dense(horizon * n_features)(h)
    out = Reshape((horizon, n_features))(h)
    return Model(inp, out, name=f"TRANSFORMER_POINT_H{horizon}_F{n_features}")

# ---------- Pérdida Huber ----------
def make_huber_loss(delta: float):
    base = tf.keras.losses.Huber(delta=float(delta))
    def huber_loss(y_true, y_pred):
        y_true = tf.cast(y_true, tf.float32); y_pred = tf.cast(y_pred, tf.float32)
        return base(y_true, y_pred)
    huber_loss.__name__ = f"huber_delta_{_eps_tag(delta)}"
    return huber_loss

# ---------- Calibración del grid de δ (cuantiles de |e| con baseline persistencia) ----------
def estimate_error_quantiles_persistence(val_ds: tf.data.Dataset, max_batches: int = 256):
    """Cuantiles de |e| en VAL usando baseline de persistencia (y_hat = último paso repetido)."""
    errs = []
    taken = 0
    for xb, yb in val_ds:
        yhat = tf.repeat(xb[:, -1:, :], repeats=tf.shape(yb)[1], axis=1)  # (B,H,F)
        e = tf.abs(tf.cast(yb, tf.float32) - tf.cast(yhat, tf.float32)).numpy().ravel()
        errs.append(e); taken += 1
        if taken >= max_batches:
            break
    if not errs:
        return [0.01, 0.02, 0.05, 0.1]
    e = np.concatenate(errs)
    qs = np.quantile(e, [0.5, 0.75, 0.9, 0.95])  # p50, p75, p90, p95
    return list(qs)

def build_delta_grid(val_ds: tf.data.Dataset):
    base = [0.01, 0.02, 0.05, 0.1]
    qs = estimate_error_quantiles_persistence(val_ds, max_batches=256)
    cand = sorted(set(base + qs))
    cand = [float(np.clip(c, 1e-4, 0.5)) for c in cand]
    cand = sorted(set(cand))
    print("\nGrid δ (calibrado con cuantiles |e| en VAL):", cand)
    return cand

# ---------- Entrenamiento para un δ (callbacks en val_log_cosh) ----------
def train_for_delta(train_ds: tf.data.Dataset,
                    val_ds: tf.data.Dataset,
                    delta: float,
                    scenario_tag: str,
                    scaler) -> tuple[Model, float, str]:
    w, f, h = infer_shapes_from_dataset(train_ds)
    model = build_transformer_point_model(w, f, h)
    metrics = build_within_metrics_minmax(scaler, EPS_LIST, n_features=f)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=3e-4, clipnorm=1.0),
        loss=make_huber_loss(delta),
        metrics=metrics
    )

    ckpt_path = os.path.join(
        MODEL_DIR, f"transformer_huber_w{w}_h{h}_delta{_eps_tag(delta)}{scenario_tag}.keras"
    )

    callbacks = [
        tf.keras.callbacks.TerminateOnNaN(),
        EarlyStopping(monitor="val_log_cosh", mode="min", patience=15, restore_best_weights=True),
        ModelCheckpoint(ckpt_path, monitor="val_log_cosh", mode="min", save_best_only=True)
    ]

    hist = model.fit(train_ds, validation_data=val_ds, epochs=160, callbacks=callbacks, verbose=1)
    vlc = np.array(hist.history.get("val_log_cosh", []), dtype=np.float32)
    vlc = vlc[np.isfinite(vlc)]
    best_val_logcosh = float(np.min(vlc)) if vlc.size > 0 else np.inf

    vll = np.array(hist.history.get("val_loss", []), dtype=np.float32)
    vll = vll[np.isfinite(vll)]
    best_val_huber = float(np.min(vll)) if vll.size > 0 else np.inf
    
    return model, best_val_logcosh, best_val_huber, ckpt_path

# ---------- Barrido de deltas (selección por val_log_cosh) ----------
def sweep_deltas(train_ds, val_ds, scenario_tag: str, scaler) -> tuple[Model, float, str]:
    deltas = build_delta_grid(val_ds)
    print(f"\n=== Barrido Huber delta {scenario_tag} (Transformer) ===")
    results = []  # (delta, best_val_logcosh, best_val_huber, ckpt_path)

    best_score = np.inf; best_ckpt=None; best_delta=None; best_model=None
    
    for d in deltas:
        print(f"\n--- Entrenando δ={d} ---")
        model, val_logcosh, val_huber, ckpt_path = train_for_delta(train_ds, val_ds, d, scenario_tag, scaler)
        print(f"  -> min val_log_cosh={val_logcosh:.12f} | min val_huber={val_huber:.12f}")
        results.append((d, val_logcosh, val_huber, ckpt_path))
        if val_logcosh < best_score:  # selección por métrica común
            best_score, best_ckpt, best_delta, best_model = val_logcosh, ckpt_path, d, model

    # Carga el mejor checkpoint (Flask-ready)
    if best_ckpt:
        best_model = tf.keras.models.load_model(best_ckpt, compile=False)

    # Tabla ordenada para inspección
    results_sorted = sorted(results, key=lambda x: x[1])
    print("\nTabla val_log_cosh(min) por δ (orden asc):")
    for d, v_log, v_hub, _ in results_sorted:
        print(f"  δ={d:>8g}: val_log_cosh={v_log:.12f} | val_huber={v_hub:.12f}")
    print(f"\n>>> Mejor δ por val_log_cosh: {best_delta} (val_log_cosh={best_score:.12f})")
    return best_model, best_delta, best_ckpt

# ---------- Evaluación en TEST ----------
def evaluate_on_test(model: tf.keras.Model, ds: tf.data.Dataset, best_delta: float, scaler):
    if (model is None) or (best_delta is None) or (not np.isfinite(best_delta)):
        print("  [AVISO] No se pudo entrenar un modelo válido.")
        return
    _, f, _ = infer_shapes_from_dataset(ds)
    metrics = build_within_metrics_minmax(scaler, EPS_LIST, n_features=f)
    model.compile(optimizer="adam", loss=make_huber_loss(best_delta), metrics=metrics)

    res = model.evaluate(ds, return_dict=True, verbose=0)
    print("  loss (Huber):              {:.6f}".format(res.get("loss", float("nan"))))
    print("  log_cosh:                  {:.6f}".format(res.get("log_cosh", float("nan"))))
    for e in EPS_LIST:
        key = f"within_eps_{_eps_tag(e)}"
        print(f"  {key:26s}: {res.get(key, float('nan')):.6f}")
    autc = compute_autc_from_results(res, EPS_LIST)
    eps_min, eps_max = float(min(EPS_LIST)), float(max(EPS_LIST))
    print(f"  AUTC[{eps_min:.3f}–{eps_max:.3f}]:         {autc:.6f}")

# ================== EJECUCIÓN: TRES ESCENARIOS ==================
# Usa tus datasets (sin shuffle en val/test):
#   train_dss/val_dss/test_dss, train_dsm/val_dsm/test_dsm, train_dsl/val_dsl/test_dsl

# Escenario S (20→1)
model_s, delta_s, path_s = sweep_deltas(train_dss, val_dss, scenario_tag="_S", scaler=scaler)
print("\nResultados TEST - Escenario S (Transformer)")
evaluate_on_test(model_s, test_dss, best_delta=delta_s, scaler=scaler)

# Escenario M (60→5)
model_m, delta_m, path_m = sweep_deltas(train_dsm, val_dsm, scenario_tag="_M", scaler=scaler)
print("\nResultados TEST - Escenario M (Transformer)")
evaluate_on_test(model_m, test_dsm, best_delta=delta_m, scaler=scaler)

# Escenario L (120→20)
model_l, delta_l, path_l = sweep_deltas(train_dsl, val_dsl, scenario_tag="_L", scaler=scaler)
print("\nResultados TEST - Escenario L (Transformer)")
evaluate_on_test(model_l, test_dsl, best_delta=delta_l, scaler=scaler)

print("\n=== Mejor δ por escenario (Transformer, métrica común: val_log_cosh) ===")
print(f"  S: {delta_s}  -> {path_s}")
print(f"  M: {delta_m}  -> {path_m}")
print(f"  L: {delta_l}  -> {path_l}")


2025-09-19 08:49:23.148063: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence



Grid δ (calibrado con cuantiles |e| en VAL): [0.00853496789932251, 0.01, 0.017357412725687027, 0.02, 0.03049677610397339, 0.04116208553314208, 0.05, 0.1]

=== Barrido Huber delta _S (Transformer) ===

--- Entrenando δ=0.00853496789932251 ---
Epoch 1/160


2025-09-19 08:49:24.924887: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 92ms/step - log_cosh: 0.2003 - loss: 0.0045 - within_eps_0_005: 0.0066 - within_eps_0_01: 0.0131 - within_eps_0_02: 0.0249 - within_eps_0_05: 0.0620 - within_eps_0_1: 0.1275 - val_log_cosh: 0.0939 - val_loss: 0.0029 - val_within_eps_0_005: 0.0101 - val_within_eps_0_01: 0.0213 - val_within_eps_0_02: 0.0436 - val_within_eps_0_05: 0.1102 - val_within_eps_0_1: 0.2213
Epoch 2/160
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 78ms/step - log_cosh: 0.1042 - loss: 0.0032 - within_eps_0_005: 0.0083 - within_eps_0_01: 0.0171 - within_eps_0_02: 0.0339 - within_eps_0_05: 0.0825 - within_eps_0_1: 0.1650 - val_log_cosh: 0.0778 - val_loss: 0.0025 - val_within_eps_0_005: 0.0124 - val_within_eps_0_01: 0.0245 - val_within_eps_0_02: 0.0498 - val_within_eps_0_05: 0.1196 - val_within_eps_0_1: 0.2187
Epoch 3/160
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 77ms/step - log_cosh: 0.0978 - loss: 0.0031 -

2025-09-19 09:28:34.986918: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 94ms/step - log_cosh: 0.1716 - loss: 0.0049 - within_eps_0_005: 0.0070 - within_eps_0_01: 0.0140 - within_eps_0_02: 0.0278 - within_eps_0_05: 0.0694 - within_eps_0_1: 0.1394 - val_log_cosh: 0.1224 - val_loss: 0.0039 - val_within_eps_0_005: 0.0089 - val_within_eps_0_01: 0.0177 - val_within_eps_0_02: 0.0368 - val_within_eps_0_05: 0.0914 - val_within_eps_0_1: 0.1758
Epoch 2/160
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 65ms/step - log_cosh: 0.0915 - loss: 0.0035 - within_eps_0_005: 0.0090 - within_eps_0_01: 0.0178 - within_eps_0_02: 0.0359 - within_eps_0_05: 0.0895 - within_eps_0_1: 0.1785 - val_log_cosh: 0.0822 - val_loss: 0.0030 - val_within_eps_0_005: 0.0130 - val_within_eps_0_01: 0.0260 - val_within_eps_0_02: 0.0502 - val_within_eps_0_05: 0.1233 - val_within_eps_0_1: 0.2320
Epoch 3/160
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 66ms/step - log_cosh: 0.0881 - loss: 0.0034 -

2025-09-19 09:55:09.976337: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 117ms/step - log_cosh: 0.1198 - loss: 0.0039 - within_eps_0_005: 0.0091 - within_eps_0_01: 0.0180 - within_eps_0_02: 0.0360 - within_eps_0_05: 0.0896 - within_eps_0_1: 0.1773 - val_log_cosh: 0.1083 - val_loss: 0.0035 - val_within_eps_0_005: 0.0100 - val_within_eps_0_01: 0.0203 - val_within_eps_0_02: 0.0405 - val_within_eps_0_05: 0.1015 - val_within_eps_0_1: 0.2062
Epoch 2/160
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 74ms/step - log_cosh: 0.0524 - loss: 0.0026 - within_eps_0_005: 0.0121 - within_eps_0_01: 0.0242 - within_eps_0_02: 0.0484 - within_eps_0_05: 0.1206 - within_eps_0_1: 0.2387 - val_log_cosh: 0.0741 - val_loss: 0.0028 - val_within_eps_0_005: 0.0129 - val_within_eps_0_01: 0.0263 - val_within_eps_0_02: 0.0527 - val_within_eps_0_05: 0.1340 - val_within_eps_0_1: 0.2610
Epoch 3/160
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 74ms/step - log_cosh: 0.0487 - loss: 0.0025 