# üìù Celda 1 ‚Äî Par√°metros y utilidades

Explicaci√≥n: fijamos el rango de evaluaci√≥n (noviembre), la fecha de corte (entrenamos hasta 31-oct) y funciones para indicadores (mismas que Cuaderno 2, resumidas aqu√≠).

In [None]:
# === MODO DE TRABAJO ===
# True  -> Opci√≥n A: Cargar modelos .h5 guardados en el Cuaderno 5 (no entrena)
# False -> Opci√≥n B: No hay .h5; re-entrena una sola vez (final fit) con datos <= 31-oct
LOAD_SAVED_MODELS = True

TICKERS = {"BBVA": "BBVA.MC", "SAN": "SAN.MC"}

# Fechas de evaluaci√≥n (aj√∫stalas si quieres un rango m√°s corto)
CUTOFF     = "2025-10-31"   # entrenar / ajustar hasta aqu√≠ (inclusive)
EVAL_START = "2025-11-01"   # evaluar a partir de aqu√≠
EVAL_END   = "2025-11-10"   # por ejemplo, los "primeros d√≠as de noviembre"

# Rutas a artefactos del cuaderno 5
from pathlib import Path
MODELS_DIR = Path("../reports/models")
BEST_CSV   = MODELS_DIR / "best_models_summary.csv"  # generado en el 5


# üìù Celda 2 ‚Äî Descargar datos, crear indicadores y dataset de features

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf

def add_indicators(df: pd.DataFrame, prefix: str):
    c, h, l = df["Close"], df["High"], df["Low"]
    out = df.copy()
    out[f"SMA5_{prefix}"]  = c.rolling(5).mean()
    out[f"SMA10_{prefix}"] = c.rolling(10).mean()
    out[f"SMA20_{prefix}"] = c.rolling(20).mean()
    out[f"EMA10_{prefix}"] = c.ewm(span=10, adjust=False).mean()
    out[f"STD10_{prefix}"] = c.rolling(10).std()
    delta = c.diff()
    up, down = delta.clip(lower=0), (-delta).clip(lower=0)
    roll_up = up.ewm(alpha=1/14, adjust=False).mean()
    roll_dn = down.ewm(alpha=1/14, adjust=False).mean()
    rs = roll_up / roll_dn.replace(0, np.nan)
    out[f"RSI14_{prefix}"] = 100 - 100/(1+rs)
    tr = pd.concat([(h-l), (h-c.shift()).abs(), (l-c.shift()).abs()], axis=1).max(axis=1)
    out[f"ATR14_{prefix}"] = tr.rolling(14).mean()
    return out

def tidy(df: pd.DataFrame, prefix: str):
    cols = {
        "Close": f"Close_{prefix}", "Volume": f"Volume_{prefix}",
        "Open": f"Open_{prefix}",   "High":   f"High_{prefix}",
        "Low":  f"Low_{prefix}",    "Adj Close": f"AdjClose_{prefix}"
    }
    return (df.rename(columns=cols)
            [[f"Close_{prefix}", f"Volume_{prefix}", f"Open_{prefix}",
              f"High_{prefix}", f"Low_{prefix}", f"AdjClose_{prefix}",
              f"SMA5_{prefix}", f"SMA10_{prefix}", f"SMA20_{prefix}",
              f"EMA10_{prefix}", f"STD10_{prefix}", f"RSI14_{prefix}", f"ATR14_{prefix}"]])

raw = {}
for k, yft in TICKERS.items():
    d = yf.download(yft, start="2000-01-01", end=EVAL_END, auto_adjust=False, actions=True, progress=False)
    d = add_indicators(d, k).dropna()
    raw[k] = tidy(d, k)

data = raw["BBVA"].join(raw["SAN"], how="inner").dropna()
print("Rango:", data.index.min().date(), "‚Üí", data.index.max().date(), "| shape:", data.shape)
data.tail(3)


# üìù Celda 3 ‚Äî Split por fecha y escalado sin fuga (fit en train)

In [None]:
from sklearn.preprocessing import MinMaxScaler

train_mask = data.index <= pd.to_datetime(CUTOFF)
eval_mask  = (data.index >= pd.to_datetime(EVAL_START)) & (data.index <= pd.to_datetime(EVAL_END))

data_train = data.loc[train_mask].copy()
data_eval  = data.loc[eval_mask].copy()

def fit_apply_scalers_per_ticker(df_train, df_all, ticker):
    cols = [c for c in df_all.columns if c.endswith(f"_{ticker}")]
    sc = MinMaxScaler().fit(df_train[cols])  # SOLO train
    scaled = df_all.copy()
    scaled[cols] = sc.transform(df_all[cols])
    return scaled, sc, cols

scaled, sc_bbva, bbva_cols = fit_apply_scalers_per_ticker(data_train, data, "BBVA")
scaled, sc_san,  san_cols  = fit_apply_scalers_per_ticker(data_train, scaled, "SAN")

scaled_train = scaled.loc[train_mask].copy()
scaled_eval  = scaled.loc[eval_mask].copy()

print("Train:", scaled_train.shape, "| Eval:", scaled_eval.shape)


# üìù Celda 4 ‚Äî Utilidades para ventanas y baseline de persistencia

In [None]:
import numpy as np
import pandas as pd

def make_window_from_end(df_scaled_all: pd.DataFrame, end_date: pd.Timestamp, cols_order: list, W: int):
    # usa W d√≠as consecutivos hasta end_date (incluido)
    if end_date not in df_scaled_all.index:
        # tomar el √∫ltimo d√≠a h√°bil anterior
        prev = df_scaled_all.index[df_scaled_all.index < end_date]
        if len(prev)==0: return None
        end_date = prev.max()
    end_pos = df_scaled_all.index.get_indexer_for([end_date])[0]
    start_pos = end_pos - (W-1)
    if start_pos < 0: return None
    idx = df_scaled_all.index[start_pos:end_pos+1]
    X = df_scaled_all.loc[idx, cols_order].values.astype(np.float32)
    return X, idx

def naive_persistence_from_window(X_window: np.ndarray):
    # asume Close como 1¬™ columna de las features del ticker (nuestro orden lo cumple)
    return float(X_window[-1, 0])


# üìù Celda 5 ‚Äî Cargar modelos (Opci√≥n A) o Final Fit (Opci√≥n B)

In [None]:
from tensorflow import keras
from tensorflow.keras import layers, regularizers
import pandas as pd

def build_model(model_type: str, units: int, n_features: int, window_size: int, lr: float):
    inp = keras.Input(shape=(window_size, n_features))
    if model_type == "SimpleRNN":
        x = layers.SimpleRNN(units, return_sequences=True, dropout=0.1, recurrent_dropout=0.1,
                             kernel_regularizer=regularizers.l2(1e-5))(inp)
        x = layers.SimpleRNN(units, dropout=0.1, recurrent_dropout=0.1)(x)
    elif model_type == "LSTM":
        x = layers.LSTM(units, return_sequences=True, dropout=0.2, recurrent_dropout=0.2)(inp)
        x = layers.LSTM(units, dropout=0.2, recurrent_dropout=0.2)(x)
    elif model_type == "GRU":
        x = layers.GRU(units, return_sequences=True, dropout=0.2, recurrent_dropout=0.2)(inp)
        x = layers.GRU(units, dropout=0.2, recurrent_dropout=0.2)(x)
    out = layers.Dense(1)(x)
    model = keras.Model(inp, out)
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr), loss="mse")
    return model

best = pd.read_csv(BEST_CSV)
MODELOS = {}  # dict: (ticker, model) -> {"model": keras.Model, "W": int, "cols": list[str]}

if LOAD_SAVED_MODELS:
    # === Opci√≥n A: cargar .h5 guardados en el 5 ===
    def h5_name(row):
        return f"{row.ticker}_{row.model}_w{int(row.window)}_u{int(row.units)}_b{int(row.batch)}_lr{float(row.lr)}.h5"

    for r in best.itertuples(index=False):
        path = MODELS_DIR / h5_name(r)
        model = keras.models.load_model(path)
        cols = [c for c in scaled.columns if c.endswith(f"_{r.ticker}")]
        MODELOS[(r.ticker, r.model)] = {"model": model, "W": int(r.window), "cols": cols}
        print("‚úî Cargado", path.name)

else:
    # === Opci√≥n B: final fit con hiperpar√°metros ganadores (sin tocar noviembre) ===
    for r in best.itertuples(index=False):
        tkr, m, W = r.ticker, r.model, int(r.window)
        cols = [c for c in scaled.columns if c.endswith(f"_{tkr}")]
        # construir dataset (t+1) en escalado con todo <= cutoff
        Xfull = scaled_train[cols].copy()
        yfull = scaled_train[f"Close_{tkr}"].shift(-1)  # objetivo t+1
        data_xy = Xfull.join(yfull.rename("y")).dropna()
        # ventaneo
        Xv = data_xy[cols].values.astype(np.float32)
        yv = data_xy["y"].values.astype(np.float32)
        xs, ys = [], []
        for i in range(W-1, len(Xv)):
            xs.append(Xv[i-W+1:i+1, :])
            ys.append(yv[i])
        X3 = np.array(xs); y1 = np.array(ys)
        # split interno: 90%/10% para EarlyStopping
        cut = int(len(X3)*0.9)
        Xtr, ytr, Xva, yva = X3[:cut], y1[:cut], X3[cut:], y1[cut:]
        model = build_model(m, int(r.units), X3.shape[2], W, float(r.lr))
        es = keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
        model.fit(Xtr, ytr, validation_data=(Xva,yva), epochs=30, batch_size=int(r.batch), verbose=0, callbacks=[es])
        MODELOS[(tkr, m)] = {"model": model, "W": W, "cols": cols}
        print(f"‚úÖ Entrenado {tkr} ¬∑ {m} (W={W}) con datos <= {CUTOFF}")


# üìù Celda 6 ‚Äî Predicci√≥n walk-forward en noviembre y baseline persistencia

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

rows = []
for (tkr, m), info in MODELOS.items():
    W, cols, model = info["W"], info["cols"], info["model"]
    # iterar por cada d√≠a de evaluaci√≥n
    for d in scaled_eval.index:
        # la ventana termina en el d√≠a h√°bil anterior a d
        # construimos ventana hasta d-1:
        prev_idx = scaled.index[scaled.index < d]
        if len(prev_idx) == 0: 
            continue
        end_date = prev_idx.max()
        win = make_window_from_end(scaled, end_date, cols, W)
        if win is None: 
            continue
        Xw, idxw = win
        y_pred = float(model.predict(Xw.reshape(1, W, len(cols)), verbose=0).ravel()[0])
        y_true = float(scaled.loc[d, f"Close_{tkr}"])
        y_naiv = naive_persistence_from_window(Xw)
        rows.append({"date": d, "ticker": tkr, "model": m, "y_true": y_true, "y_pred": y_pred, "y_naive": y_naiv})

res = pd.DataFrame(rows).sort_values(["ticker","date"]).reset_index(drop=True)

def summarize(df):
    out = []
    for (t,m), g in df.groupby(["ticker","model"]):
        mse = mean_squared_error(g.y_true, g.y_pred)
        mae = mean_absolute_error(g.y_true, g.y_pred)
        mse_n = mean_squared_error(g.y_true, g.y_naive)
        imp = 100*(1 - mse/mse_n)
        out.append({"ticker":t,"model":m,"MSE_nov":mse,"MAE_nov":mae,"MSE_naive":mse_n,"Mejora_vs_naive_%":imp,"n_dias":len(g)})
    return pd.DataFrame(out).sort_values(["ticker","MSE_nov"])

summary = summarize(res)
summary


# üìù Celda 7 ‚Äî Gr√°ficas ‚ÄúReal vs Predicho vs Naive‚Äù (Noviembre)

In [None]:
import matplotlib.pyplot as plt

def plot_nov(df, ticker):
    best_m = (summary[summary["ticker"]==ticker].sort_values("MSE_nov").iloc[0]["model"])
    g = df[(df["ticker"]==ticker) & (df["model"]==best_m)]
    plt.figure(figsize=(12,4))
    plt.plot(g["date"], g["y_true"], label="Real (Close)", linewidth=1.5)
    plt.plot(g["date"], g["y_pred"], label=f"Predicho ({best_m})", linewidth=1.5)
    plt.plot(g["date"], g["y_naive"], label="Naive", linestyle="--", alpha=0.7)
    plt.title(f"{ticker} ¬∑ Noviembre ‚Äî Real vs Predicho (mejor: {best_m})")
    plt.grid(alpha=0.3); plt.legend(); plt.tight_layout(); plt.show()

plot_nov(res, "BBVA")
plot_nov(res, "SAN")


# üìù Celda 8 ‚Äî Guardado de resultados

In [None]:
OUT = Path("../reports/noviembre")
OUT.mkdir(parents=True, exist_ok=True)
res.to_csv(OUT / "detalle_noviembre.csv", index=False)
summary.to_csv(OUT / "resumen_noviembre.csv", index=False)
print("‚úÖ Guardado en", OUT)
summary


In [None]:
# === Exportar predicciones para la app Streamlit ===
import pandas as pd
from pathlib import Path

# Reemplaza estas variables por tus DataFrames finales:
# df_bbva_pred y df_san_pred deben tener: date, y_true (puede ser NaN), y_pred
# Si tus nombres son otros, adapta abajo.

def _standardize(df: pd.DataFrame, tk: str) -> pd.DataFrame:
    out = df.copy()
    # normaliza nombres
    rename = {c.lower().strip(): c for c in out.columns}
    out.columns = [c.lower().strip() for c in out.columns]
    # columnas m√≠nimas
    needed = {"date", "y_pred"}
    assert needed.issubset(set(out.columns)), f"Faltan columnas {needed - set(out.columns)}"
    if "y_true" not in out.columns:
        out["y_true"] = pd.NA
    out["ticker"] = tk
    out["date"] = pd.to_datetime(out["date"]).dt.date
    out = out[["date", "ticker", "y_true", "y_pred"]].drop_duplicates()
    return out

df_bbva_std = _standardize(df_bbva_pred, "BBVA")
df_san_std  = _standardize(df_san_pred,  "SAN")
df_app = pd.concat([df_bbva_std, df_san_std], ignore_index=True).sort_values(["ticker","date"])

# Rango de inter√©s: principios de noviembre (ajusta si quieres)
df_app = df_app[(df_app["date"] >= pd.to_datetime("2025-11-01").date()) &
                (df_app["date"] <= pd.to_datetime("2025-11-10").date())]

out_path = Path("data/app")
out_path.mkdir(parents=True, exist_ok=True)
df_app.to_csv(out_path / "predicciones.csv", index=False, encoding="utf-8")
print(f"[OK] Exportado: {out_path / 'predicciones.csv'}  | Filas={len(df_app)}")


In [None]:
import matplotlib.pyplot as plt

def plot_extended_forecast(df_scaled_all, res_df, ticker, days_hist=15):
    """
    Muestra los √∫ltimos d√≠as reales antes del 31-oct y contin√∫a con las predicciones de noviembre.
    """
    cutoff = pd.to_datetime(CUTOFF)
    eval_start = pd.to_datetime(EVAL_START)
    eval_end = pd.to_datetime(EVAL_END)

    # === tramo real antes del corte (√∫ltimos N d√≠as previos) ===
    hist_mask = (df_scaled_all.index >= cutoff - pd.Timedelta(days=days_hist)) & \
                (df_scaled_all.index <= cutoff)
    hist = df_scaled_all.loc[hist_mask, f"Close_{ticker}"].reset_index()
    hist.rename(columns={"Date": "date", f"Close_{ticker}": "y_hist"}, inplace=True)

    # === tramo predicho (noviembre) ===
    best_m = (summary[summary["ticker"] == ticker]
              .sort_values("MSE_nov")
              .iloc[0]["model"])
    preds = res_df[(res_df["ticker"] == ticker) & (res_df["model"] == best_m)][
        ["date", "y_pred", "y_true"]
    ].copy()
    preds["period"] = "Predicci√≥n (noviembre)"

    # === unir ===
    plt.figure(figsize=(12,5))
    plt.plot(hist["date"], hist["y_hist"], color="black", linewidth=2.0, label="Hist√≥rico (octubre)")
    plt.plot(preds["date"], preds["y_true"], color="green", linestyle="-", label="Real noviembre")
    plt.plot(preds["date"], preds["y_pred"], color="orange", linestyle="--", label=f"Predicho ({best_m})")

    plt.axvline(cutoff, color="gray", linestyle=":", linewidth=1.5)
    plt.text(cutoff, plt.ylim()[0], "Corte 31-oct", fontsize=9, color="gray", ha="right", va="bottom")

    plt.title(f"{ticker} ‚Äî Hist√≥rico y Predicci√≥n noviembre ({best_m})", fontsize=13)
    plt.xlabel("Fecha"); plt.ylabel("Precio de cierre (escalado)")
    plt.grid(alpha=0.3)
    plt.legend()
    plt.tight_layout()
    plt.show()

plot_extended_forecast(scaled, res, "BBVA", days_hist=15)
plot_extended_forecast(scaled, res, "SAN", days_hist=15)
