In [None]:
import os
import json
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense

# ───────────────────────────────────────────────────────────────
# CONFIGURACIÓN
# ───────────────────────────────────────────────────────────────

BASE_PATH    = "Conjunt de dades Preprocessades/Datasets"
RESULTS_PATH = "results_LSTM"
DATASETS     = [
    "Amazon_Stock_Price_output.csv",
    "Euro_Stoxx_50_Stock_Price_output.csv",
    "Google_Stock_Price_output.csv",
    "Hang_Seng_Stock_Price_output.csv",
    "IBEX_35_Stock_Price_output.csv",
    "Indra_Stock_Price_output.csv",
    "P&G_Stock_Price_output.csv",
    "S&P500_Stock_Price_output.csv"
]

N_STEPS = 30
FEATURE_COLUMNS = [
    "Open","High","Low","Volume",
    "EMA_7","EMA_40","MACD","Signal_Line",
    "MACD_Hist","RSI","ATR"
]
TARGET_COLUMN = "Close"


# ───────────────────────────────────────────────────────────────
# FUNCIONES AUXILIARES
# ───────────────────────────────────────────────────────────────

def create_sequences(X, y, n_steps=30):
    Xs, ys = [], []
    for i in range(n_steps, len(X)):
        Xs.append(X[i - n_steps:i])
        ys.append(y[i])
    return np.array(Xs), np.array(ys)

def build_lstm_model(sequence_length, n_features, units, n_layers, dropout, learning_rate):
    """
    Reconstruye la arquitectura LSTM con los hiperparámetros dados.
    """
    model = Sequential()
    for i in range(n_layers):
        is_first = (i == 0)
        is_last = (i == n_layers - 1)
        return_sequences = not is_last
        if is_first:
            model.add(LSTM(
                units, return_sequences=return_sequences,
                input_shape=(sequence_length, n_features)
            ))
        else:
            model.add(LSTM(units, return_sequences=return_sequences))
        model.add(Dropout(dropout))
    model.add(Dense(1, activation="linear"))
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(loss="huber", optimizer=optimizer, metrics=["mean_absolute_error"])
    return model

def compute_metrics(model, X_scaled, y_scaled, scaler_y):
    """
    Calcula MAE, RMSE, R² y devuelve y_true, y_pred.
    """
    y_pred_scaled = model.predict(X_scaled, verbose=0)
    y_pred = scaler_y.inverse_transform(y_pred_scaled)
    y_true = scaler_y.inverse_transform(y_scaled)

    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    return mae, rmse, r2, y_true, y_pred

def recompute_indicators(df):
    """
    Recalcula EMA_7, EMA_40, MACD, Signal_Line, MACD_Hist, RSI, ATR para todo df.
    """
    close = df['Close']
    df['EMA_7'] = close.ewm(span=7, adjust=False).mean()
    df['EMA_40'] = close.ewm(span=40, adjust=False).mean()

    ema_12 = close.ewm(span=12, adjust=False).mean()
    ema_26 = close.ewm(span=26, adjust=False).mean()
    macd = ema_12 - ema_26
    signal = macd.ewm(span=9, adjust=False).mean()
    df['MACD'] = macd
    df['Signal_Line'] = signal
    df['MACD_Hist'] = macd - signal

    delta = close.diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.ewm(alpha=1/14, adjust=False).mean()
    avg_loss = loss.ewm(alpha=1/14, adjust=False).mean()
    rs = avg_gain / (avg_loss + 1e-8)
    df['RSI'] = 100 - (100 / (1 + rs))

    df['ATR'] = (df['High'] - df['Low']).rolling(window=14).mean()


# ───────────────────────────────────────────────────────────────
# BUCLE PRINCIPAL: CARGAR CADA DATASET, LEER PESOS Y GRAFICAR
# ───────────────────────────────────────────────────────────────

for filename in DATASETS:
    dataset_name = os.path.splitext(filename)[0]
    print(f"\n===== Procesando: {dataset_name} =====")

    # Rutas
    data_path    = os.path.join(BASE_PATH, filename)
    model_folder = os.path.join(RESULTS_PATH, dataset_name)
    weights_path = os.path.join(model_folder, f"{dataset_name}_best_weights.weights.h5")
    params_path  = os.path.join(model_folder, f"{dataset_name}_best_params.json")
    
    print((weights_path))
    print(params_path)

    if not os.path.isfile(weights_path) or not os.path.isfile(params_path):
        print(f"  ⚠️ Faltan pesos o parámetros para {dataset_name}, omitiendo.")
        continue

    # 1) Cargar CSV original
    df = pd.read_csv(data_path)
    df['Date'] = pd.to_datetime(df['Date'])
    df.sort_values("Date", inplace=True)
    df = df.dropna(subset=FEATURE_COLUMNS + [TARGET_COLUMN]).reset_index(drop=True)

    # 2) Crear secuencias y dividir en test (solo test nos interesa)
    data_X = df[FEATURE_COLUMNS].values
    data_y = df[TARGET_COLUMN].values.reshape(-1, 1)
    X_seq, y_seq = create_sequences(data_X, data_y, n_steps=N_STEPS)

    n_total = len(X_seq)
    test_start = int(n_total * (1 - TEST_RATIO))
    X_test = X_seq[test_start:]
    y_test = y_seq[test_start:]

    # 3) Reconstruir scalers según train implícito (solo necesitamos scaler_X y scaler_y)
    #    Para reconstruir exactamente el escalado, lo más sencillo es recalcular:
    #    - Fit scaler_X sobre todo X_seq[:-len(X_test)] (train+val), 
    #    - Fit scaler_y sobre y_seq[:-len(y_test)].
    split_point = test_start  # cantidad de secuencias de train+val
    X_train_val = X_seq[:split_point]
    y_train_val = y_seq[:split_point]

    scaler_X = MinMaxScaler()
    flat_Xtv = X_train_val.reshape(-1, len(FEATURE_COLUMNS))
    scaler_X.fit(flat_Xtv)

    scaler_y = MinMaxScaler()
    scaler_y.fit(y_train_val)

    def scale_X(X):
        flat = X.reshape(-1, len(FEATURE_COLUMNS))
        flat_scaled = scaler_X.transform(flat)
        return flat_scaled.reshape(X.shape)

    X_test_scaled = scale_X(X_test)
    y_test_scaled = scaler_y.transform(y_test)

    # 4) Cargar hiperparámetros
    with open(params_path, 'r') as f:
        best_params = json.load(f)
    n_layers      = best_params['n_layers']
    units         = best_params['units']
    dropout       = best_params['dropout']
    learning_rate = best_params['learning_rate']

    print(f"  ✓ Parámetros cargados: layers={n_layers}, units={units}, dropout={dropout}, lr={learning_rate}")

    # 5) Reconstruir modelo y cargar pesos
    model = build_lstm_model(
        sequence_length=N_STEPS,
        n_features=len(FEATURE_COLUMNS),
        units=units,
        n_layers=n_layers,
        dropout=dropout,
        learning_rate=learning_rate
    )
    model.load_weights(weights_path)
    print(f"  ✓ Pesos cargados desde: {weights_path}")

    # 6) Calcular métricas en Test
    mae_test, rmse_test, r2_test, y_true, y_pred = compute_metrics(
        model, X_test_scaled, y_test_scaled, scaler_y
    )
    print(f"  → Test MAE={mae_test:.4f}, RMSE={rmse_test:.4f}, R²={r2_test:.4f}")

    # 7) Graficar Real vs Predicho (Test)
    dates_test = df['Date'].iloc[-len(y_true):].reset_index(drop=True)
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=dates_test, y=y_true.flatten(),
        mode='lines', name='Real (Close)', line=dict(color='blue')
    ))
    fig.add_trace(go.Scatter(
        x=dates_test, y=y_pred.flatten(),
        mode='lines', name='Predicho', line=dict(color='red', dash='dash')
    ))
    fig.update_layout(
        title=f"{dataset_name} – Real vs Predicción (Test)",
        xaxis_title='Fecha',
        yaxis_title='Precio Close (USD)',
        template='plotly_dark',
        xaxis_rangeslider_visible=True
    )
    plot_html = os.path.join(model_folder, f"{dataset_name}_test_plot.html")
    plot_png  = os.path.join(model_folder, f"{dataset_name}_test_plot.png")
    fig.write_html(plot_html)
    fig.write_image(plot_png)
    print(f"  ✓ Gráfica Test guardada en: {plot_html}, {plot_png}")

    # 8) Predicción autoregresiva de los próximos 10 días laborables
    df_future = df.copy().reset_index(drop=True)
    recompute_indicators(df_future)
    input_window = X_test_scaled[-1].reshape(1, N_STEPS, len(FEATURE_COLUMNS))
    last_date = df_future['Date'].iloc[-1]
    future_dates = pd.bdate_range(start=last_date + pd.Timedelta(days=1), periods=10)
    future_preds = []

    for date in future_dates:
        y_pred_scaled = model.predict(input_window, verbose=0)
        y_pred = scaler_y.inverse_transform(y_pred_scaled)[0, 0]
        future_preds.append(y_pred)

        prev = df_future.iloc[-1]
        new_row = {
            'Date':        date,
            'Open':        prev['Close'],
            'High':        y_pred,
            'Low':         y_pred,
            'Volume':      prev['Volume'],
            'Close':       y_pred,
            'EMA_7':       np.nan,
            'EMA_40':      np.nan,
            'MACD':        np.nan,
            'Signal_Line': np.nan,
            'MACD_Hist':   np.nan,
            'RSI':         np.nan,
            'ATR':         np.nan
        }
        df_future.loc[len(df_future)] = new_row
        recompute_indicators(df_future)

        last_features = df_future[FEATURE_COLUMNS].iloc[-N_STEPS:].values
        last_features_scaled = scaler_X.transform(last_features)
        input_window = last_features_scaled.reshape(1, N_STEPS, len(FEATURE_COLUMNS))

    # 9) Guardar predicciones futuras en CSV
    df_fut_pred = pd.DataFrame({
        "Date": future_dates,
        "Predicted_Close": future_preds
    })
    fut_csv = os.path.join(model_folder, f"{dataset_name}_future_10days.csv")
    df_fut_pred.to_csv(fut_csv, index=False)
    print(f"  ✓ Predicciones futuras guardadas en: {fut_csv}")

    # 10) Graficar histórico + predicciones futuras
    fig_future = go.Figure()
    fig_future.add_trace(go.Scatter(
        x=df['Date'], y=df['Close'],
        mode='lines', name='Histórico Close', line=dict(color='lightblue')
    ))
    fig_future.add_trace(go.Scatter(
        x=future_dates, y=np.array(future_preds),
        mode='lines+markers', name='Predicción futura',
        line=dict(color='orange', dash='dash'),
        marker=dict(size=6)
    ))
    fig_future.update_layout(
        title=f"{dataset_name} – Predicción Próximos 10 Días",
        xaxis_title='Fecha',
        yaxis_title='Precio Close (USD)',
        template='plotly_dark',
        xaxis_rangeslider_visible=True
    )
    fut_html = os.path.join(model_folder, f"{dataset_name}_future_plot.html")
    fut_png  = os.path.join(model_folder, f"{dataset_name}_future_plot.png")
    fig_future.write_html(fut_html)
    fig_future.write_image(fut_png)
    print(f"  ✓ Gráfica futura guardada en: {fut_html}, {fut_png}")



===== Procesando: Amazon_Stock_Price_output =====
False
results_LSTM\Amazon_Stock_Price_output\Amazon_Stock_Price_output_best_params.json
  ⚠️ Faltan pesos o parámetros para Amazon_Stock_Price_output, omitiendo.

===== Procesando: Euro_Stoxx_50_Stock_Price_output =====
False
results_LSTM\Euro_Stoxx_50_Stock_Price_output\Euro_Stoxx_50_Stock_Price_output_best_params.json
  ⚠️ Faltan pesos o parámetros para Euro_Stoxx_50_Stock_Price_output, omitiendo.

===== Procesando: Google_Stock_Price_output =====
False
results_LSTM\Google_Stock_Price_output\Google_Stock_Price_output_best_params.json
  ⚠️ Faltan pesos o parámetros para Google_Stock_Price_output, omitiendo.

===== Procesando: Hang_Seng_Stock_Price_output =====
False
results_LSTM\Hang_Seng_Stock_Price_output\Hang_Seng_Stock_Price_output_best_params.json
  ⚠️ Faltan pesos o parámetros para Hang_Seng_Stock_Price_output, omitiendo.

===== Procesando: IBEX_35_Stock_Price_output =====
False
results_LSTM\IBEX_35_Stock_Price_output\IBEX_35_St