<h1 style="text-align: center;">RNN MODEL</h1>

<h2>Initialization</h2>

In [53]:
# Libraries
import MetaTrader5 as mt5  # pip install MetaTrader5
import pandas as pd  # pip install pandas
import talib
import numpy as np
from datetime import datetime
import plotly.express as px  # pip install plotly

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [54]:
# start the platform with initialize()
mt5.initialize()

True

In [55]:
# login to Trade Account with login()
# make sure that trade server is enabled in MT5 client terminal

login = 5044174558
password = '-a6bNxSy'
server = 'MetaQuotes-Demo'

mt5.login(login, password, server)

True

In [56]:
# get account info
account_info = mt5.account_info()
print(account_info)

# getting specific account data
login_number = account_info.login
balance = account_info.balance
equity = account_info.equity

print()
print('login: ', login_number)
print('balance: ', balance)
print('equity: ', equity)

AccountInfo(login=5044174558, trade_mode=0, leverage=100, limit_orders=200, margin_so_mode=0, trade_allowed=True, trade_expert=True, margin_mode=2, currency_digits=2, fifo_close=False, balance=1000.0, credit=0.0, profit=0.0, equity=1000.0, margin=0.0, margin_free=1000.0, margin_level=0.0, margin_so_call=50.0, margin_so_so=30.0, margin_initial=0.0, margin_maintenance=0.0, assets=0.0, liabilities=0.0, commission_blocked=0.0, name='Jean-Charles Jacques', server='MetaQuotes-Demo', currency='EUR', company='MetaQuotes Ltd.')

login:  5044174558
balance:  1000.0
equity:  1000.0


<h2>GET MARKET DATA</h2>

In [57]:
symbol = "EURUSD"
info = mt5.symbol_info(symbol)

point = info.point       # ex: 0.00001
digits = info.digits     # ex: 5

pip_value = point * 10   # 1 pip en valeur de prix


In [58]:
# ohlc_data
ohlc_data = pd.DataFrame(mt5.copy_rates_range(symbol, 
                                             mt5.TIMEFRAME_D1, 
                                             datetime(2021, 1, 1), 
                                             datetime.now()))

fig = px.line(ohlc_data, x=ohlc_data['time'], y=ohlc_data['close'])
fig.show()

ohlc_data

Unnamed: 0,time,open,high,low,close,tick_volume,spread,real_volume
0,1609718400,1.22395,1.23098,1.22280,1.22473,85620,0,0
1,1609804800,1.22466,1.23056,1.22432,1.22965,75380,0,0
2,1609891200,1.22968,1.23494,1.22657,1.23252,118259,0,0
3,1609977600,1.23256,1.23441,1.22452,1.22710,84749,0,0
4,1610064000,1.22713,1.22845,1.21932,1.22218,100138,0,0
...,...,...,...,...,...,...,...,...
1293,1766361600,1.17120,1.17692,1.17060,1.17600,42841,0,0
1294,1766448000,1.17600,1.18019,1.17552,1.17942,51711,0,0
1295,1766534400,1.17941,1.18077,1.17726,1.17764,55551,0,0
1296,1766707200,1.17769,1.17967,1.17615,1.17712,69962,0,0


In [59]:
ohlc_data.to_csv('data/eurusd.csv')

<h2>COMMON FUNCTIONS</h2>

In [60]:
def plot_loss(history, title="Model Loss Over Epochs"):
    loss_df = pd.DataFrame({
        "Epoch": range(1, len(history.history["loss"]) + 1),
        "Train Loss": history.history["loss"],
        "Validation Loss": history.history.get("val_loss")
    })

    fig = px.line(
        loss_df,
        x="Epoch",
        y=["Train Loss", "Validation Loss"],
        title=title,
        labels={"value": "Loss", "variable": "Metric"}
    )

    fig.update_layout(
        hovermode="x unified",
        template="plotly_white"
    )

    fig.show()


In [61]:
def plot_prices(
    df,
    y_train_real,
    y_test_real,
    train_pred,
    test_pred,
    window_size,
    title="Train / Test / Predictions"
):
    plot_df = pd.DataFrame(index=df.index)

    plot_df["Real Price"] = df["close"].astype(float)

    # IMPORTANT : np.nan et non None
    plot_df["Train Prediction"] = np.nan
    plot_df["Test Prediction"] = np.nan

    plot_df.iloc[
        window_size : window_size + len(train_pred),
        plot_df.columns.get_loc("Train Prediction")
    ] = train_pred

    plot_df.iloc[
        window_size + len(train_pred) :
        window_size + len(train_pred) + len(test_pred),
        plot_df.columns.get_loc("Test Prediction")
    ] = test_pred

    fig = px.line(
        plot_df,
        x=plot_df.index,
        y=plot_df.columns,
        title=title
    )

    fig.update_layout(
        hovermode="x unified",
        template="plotly_white"
    )

    fig.show()


In [62]:
def get_predictions(model, X_train, X_test, scaler):
    # Prédictions normalisées
    train_pred_scaled = model.predict(X_train)
    test_pred_scaled = model.predict(X_test)

    # Matrices factices pour inverse_transform
    train_dummy = np.zeros((len(train_pred_scaled), scaler.n_features_in_))
    test_dummy = np.zeros((len(test_pred_scaled), scaler.n_features_in_))

    train_dummy[:, 0] = train_pred_scaled[:, 0]
    test_dummy[:, 0] = test_pred_scaled[:, 0]

    # Dé-normalisation
    train_pred = scaler.inverse_transform(train_dummy)[:, 0]
    test_pred = scaler.inverse_transform(test_dummy)[:, 0]

    return train_pred, test_pred


<h2>TRAIN MODEL</h2>

In [63]:
# Charger le dataset
df = pd.read_csv("data/eurusd.csv")
df = df[['close', 'tick_volume']].dropna()

In [64]:
# Normalisation (CRUCIAL pour un RNN)
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df)

In [65]:
# Création des séquences temporelles
# On crée des fenêtres glissantes, par exemple 60 pas de temps → prédire le suivant.
def create_sequences(data, window_size=60):
    X, y = [], []
    for i in range(window_size, len(data)):
        X.append(data[i-window_size:i])
        y.append(data[i, 0])  # close uniquement
    return np.array(X), np.array(y)

WINDOW_SIZE = 60
X, y = create_sequences(scaled_data, WINDOW_SIZE)


In [66]:
# Split train / test
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]


In [67]:
# Modèle RNN (LSTM)
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    LSTM(32),
    Dropout(0.2),
    Dense(1)
])

model.compile(
    optimizer="adam",
    loss="mse"
)

model.summary()



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



In [44]:
# Entraînement
early_stop = EarlyStopping(
    monitor="val_loss",
    patience=5,
    restore_best_weights=True
)

history = model.fit(
    X_train,
    y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=32,
    callbacks=[early_stop]
)


Epoch 1/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 30ms/step - loss: 0.0572 - val_loss: 0.0046
Epoch 2/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.0077 - val_loss: 0.0022
Epoch 3/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - loss: 0.0052 - val_loss: 0.0024
Epoch 4/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.0052 - val_loss: 0.0027
Epoch 5/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.0045 - val_loss: 0.0038
Epoch 6/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.0047 - val_loss: 0.0019
Epoch 7/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - loss: 0.0044 - val_loss: 0.0021
Epoch 8/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.0044 - val_loss: 0.0027
Epoch 9/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━

In [45]:
# Prédictions
train_pred, test_pred = get_predictions(
    model,
    X_train,
    X_test,
    scaler
)

[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 


In [46]:
# Récupérer les valeurs réelles (y_train / y_test) dé-normalisées
def inverse_y(y, scaler):
    dummy = np.zeros((len(y), scaler.n_features_in_))
    dummy[:, 0] = y
    return scaler.inverse_transform(dummy)[:, 0]

y_train_real = inverse_y(y_train, scaler)
y_test_real = inverse_y(y_test, scaler)

In [47]:
# RESULTATS
plot_prices(
    df=df,
    y_train_real=y_train_real,
    y_test_real=y_test_real,
    train_pred=train_pred,
    test_pred=test_pred,
    window_size=WINDOW_SIZE
)

plot_loss(history)

<h2>TEST VALIDATION</h2>

In [48]:
def regression_metrics(y_true, y_pred, eps=1e-9):
    y_true = np.asarray(y_true).astype(float)
    y_pred = np.asarray(y_pred).astype(float)
    err = y_true - y_pred
    ae = np.abs(err)

    mae = float(np.mean(ae))
    rmse = float(np.sqrt(np.mean(err**2)))
    mape = float(np.mean(ae / (np.abs(y_true) + eps)) * 100.0)
    smape = float(np.mean(2.0 * ae / (np.abs(y_true) + np.abs(y_pred) + eps)) * 100.0)

    # R2
    ss_res = np.sum(err**2)
    ss_tot = np.sum((y_true - np.mean(y_true))**2) + eps
    r2 = float(1 - ss_res / ss_tot)

    return {
        "MAE": mae,
        "RMSE": rmse,
        "MAPE_%": mape,
        "sMAPE_%": smape,
        "R2": r2
    }

def build_error_df(df, y_test_real, test_pred, window_size, train_len):
    # index correspondant à y_test (les y commencent à window_size)
    start = window_size + train_len
    idx = df.index[start:start + len(y_test_real)]

    out = pd.DataFrame({
        "time": idx,
        "real": y_test_real,
        "pred": test_pred
    }).set_index("time")

    out["error"] = out["real"] - out["pred"]
    out["abs_error"] = out["error"].abs()
    out["abs_error_pct"] = out["abs_error"] / out["real"].abs().replace(0, np.nan) * 100.0

    # direction (par rapport au prix précédent réel)
    out["prev_real"] = df["close"].loc[idx].shift(1)
    out["real_dir"] = np.sign(out["real"] - out["prev_real"])
    out["pred_dir"] = np.sign(out["pred"] - out["prev_real"])
    out["direction_ok"] = (out["real_dir"] == out["pred_dir"]).astype(int)

    return out.dropna()

In [49]:
def plot_error_diagnostics(err_df, rolling_window=20, title_prefix="Test"):
    # AE dans le temps
    fig1 = px.line(
        err_df,
        x=err_df.index,
        y="abs_error",
        title=f"{title_prefix} – Absolute Error over time"
    )
    fig1.update_layout(hovermode="x unified", template="plotly_white")
    fig1.show()

    # Histogramme AE
    fig2 = px.histogram(
        err_df,
        x="abs_error",
        nbins=50,
        title=f"{title_prefix} – Absolute Error distribution"
    )
    fig2.update_layout(template="plotly_white")
    fig2.show()

    # Rolling MAE
    roll = err_df["abs_error"].rolling(rolling_window).mean()
    fig3 = px.line(
        x=err_df.index,
        y=roll,
        title=f"{title_prefix} – Rolling MAE ({rolling_window})"
    )
    fig3.update_layout(hovermode="x unified", template="plotly_white")
    fig3.show()


In [50]:
def direction_and_quantiles(err_df):
    hit_rate = float(err_df["direction_ok"].mean() * 100.0)
    q = err_df["abs_error"].quantile([0.5, 0.9, 0.95, 0.99]).to_dict()
    return hit_rate, q


In [25]:
metrics = regression_metrics(y_test_real, test_pred)
print("Regression metrics:", metrics)

err_df = build_error_df(
    df=df,
    y_test_real=y_test_real,
    test_pred=test_pred,
    window_size=WINDOW_SIZE,
    train_len=len(train_pred)
)

hit_rate, quantiles = direction_and_quantiles(err_df)
print(f"Directional accuracy (test): {hit_rate:.2f}%")
print("Abs error quantiles:", quantiles)

plot_error_diagnostics(err_df, rolling_window=20, title_prefix="EURUSD Test")


Regression metrics: {'MAE': 0.008242218536792254, 'RMSE': 0.011167255967600011, 'MAPE_%': 0.7313987849876286, 'sMAPE_%': 0.7331670970456804, 'R2': 0.9400432128260613}
Directional accuracy (test): 54.25%
Abs error quantiles: {0.5: 0.005840588111877265, 0.9: 0.016308532814025955, 0.95: 0.02703264475250259, 0.99: 0.03522511696166973}


In [70]:
def build_error_df_with_pips(
    df,
    y_test_real,
    test_pred,
    window_size,
    train_len,
    pip_value
):
    start = window_size + train_len
    idx = df.index[start:start + len(y_test_real)]

    out = pd.DataFrame({
        "time": idx,
        "real": y_test_real,
        "pred": test_pred
    }).set_index("time")

    out["error"] = out["real"] - out["pred"]
    out["abs_error"] = out["error"].abs()

    # 🔥 conversion en pips
    out["abs_error_pips"] = out["abs_error"] / pip_value

    # direction
    out["prev_real"] = df["close"].loc[idx].shift(1)
    out["real_dir"] = np.sign(out["real"] - out["prev_real"])
    out["pred_dir"] = np.sign(out["pred"] - out["prev_real"])
    out["direction_ok"] = (out["real_dir"] == out["pred_dir"]).astype(int)

    return out.dropna()


In [71]:
def pip_error_stats(err_df):
    stats = {
        "MAE_pips": float(err_df["abs_error_pips"].mean()),
        "Median_pips": float(err_df["abs_error_pips"].median()),
        "P90_pips": float(err_df["abs_error_pips"].quantile(0.90)),
        "P95_pips": float(err_df["abs_error_pips"].quantile(0.95)),
        "Max_pips": float(err_df["abs_error_pips"].max()),
        "Directional_accuracy_%": float(err_df["direction_ok"].mean() * 100)
    }
    return stats


In [68]:
def plot_pip_error(err_df, title="Absolute Error (pips) – Test"):
    fig = px.line(
        err_df,
        x=err_df.index,
        y="abs_error_pips",
        title=title,
        labels={"abs_error_pips": "Error (pips)"}
    )
    fig.update_layout(hovermode="x unified", template="plotly_white")
    fig.show()


In [69]:
def plot_pip_error_distribution(err_df):
    fig = px.histogram(
        err_df,
        x="abs_error_pips",
        nbins=50,
        title="Distribution of absolute error (pips)"
    )
    fig.update_layout(template="plotly_white")
    fig.show()


In [72]:
def plot_rolling_mae_pips(err_df, window=20):
    roll = err_df["abs_error_pips"].rolling(window).mean()
    fig = px.line(
        x=err_df.index,
        y=roll,
        title=f"Rolling MAE ({window} days) – pips",
        labels={"y": "MAE (pips)"}
    )
    fig.update_layout(hovermode="x unified", template="plotly_white")
    fig.show()


In [73]:
err_df = build_error_df_with_pips(
    df=df,
    y_test_real=y_test_real,
    test_pred=test_pred,
    window_size=WINDOW_SIZE,
    train_len=len(train_pred),
    pip_value=pip_value
)

stats = pip_error_stats(err_df)
print("📊 Test error stats (pips):")
for k, v in stats.items():
    print(f"{k}: {v:.2f}")

plot_pip_error(err_df)
plot_pip_error_distribution(err_df)
plot_rolling_mae_pips(err_df, window=20)


📊 Test error stats (pips):
MAE_pips: 81.90
Median_pips: 59.98
P90_pips: 165.60
P95_pips: 268.47
Max_pips: 444.85
Directional_accuracy_%: 54.25


# HORIZON 5 DAYS

In [74]:
WINDOW_SIZE = 60     # 60 jours d'historique
HORIZON = 5          # 5 jours à prédire
N_FEATURES = 2       # close + volume

In [75]:
def create_sequences_multi(data, window, horizon):
    X, y = [], []
    for i in range(window, len(data) - horizon + 1):
        X.append(data[i-window:i])
        y.append(data[i:i+horizon, 0])  # les 5 closes futurs
    return np.array(X), np.array(y)

X, y = create_sequences_multi(
    scaled_data,
    WINDOW_SIZE,
    HORIZON
)

split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

In [76]:
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(WINDOW_SIZE, N_FEATURES)),
    Dropout(0.2),
    LSTM(32),
    Dense(HORIZON)
])

model.compile(
    optimizer="adam",
    loss="mse"
)

history = model.fit(
    X_train,
    y_train,
    validation_data=(X_test, y_test),
    epochs=60,
    batch_size=32,
    callbacks=[EarlyStopping(patience=7, restore_best_weights=True)],
    verbose=1
)


Epoch 1/60



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 31ms/step - loss: 0.0859 - val_loss: 0.0109
Epoch 2/60
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - loss: 0.0062 - val_loss: 0.0034
Epoch 3/60
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - loss: 0.0035 - val_loss: 0.0033
Epoch 4/60
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.0031 - val_loss: 0.0032
Epoch 5/60
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.0030 - val_loss: 0.0044
Epoch 6/60
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - loss: 0.0029 - val_loss: 0.0049
Epoch 7/60
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - loss: 0.0028 - val_loss: 0.0035
Epoch 8/60
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - loss: 0.0027 - val_loss: 0.0034
Epoch 9/60
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [77]:
def get_predictions_multi(model, X, scaler, horizon):
    pred_scaled = model.predict(X)

    preds = []
    for h in range(horizon):
        dummy = np.zeros((len(pred_scaled), scaler.n_features_in_))
        dummy[:, 0] = pred_scaled[:, h]
        preds.append(scaler.inverse_transform(dummy)[:, 0])

    return np.stack(preds, axis=1)  # shape (n_samples, 5)

train_pred = get_predictions_multi(model, X_train, scaler, HORIZON)
test_pred = get_predictions_multi(model, X_test, scaler, HORIZON)


[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 


In [78]:
def inverse_y_multi(y, scaler):
    reals = []
    for h in range(y.shape[1]):
        dummy = np.zeros((len(y), scaler.n_features_in_))
        dummy[:, h*0] = y[:, h]
        reals.append(scaler.inverse_transform(dummy)[:, 0])
    return np.stack(reals, axis=1)

y_test_real = inverse_y_multi(y_test, scaler)


In [79]:
def pip_error_by_horizon(
    y_real,
    y_pred,
    pip_value
):
    stats = {}
    for h in range(y_real.shape[1]):
        ae_pips = np.abs(y_real[:, h] - y_pred[:, h]) / pip_value
        stats[f"J+{h+1}_MAE_pips"] = ae_pips.mean()
        stats[f"J+{h+1}_P90_pips"] = np.quantile(ae_pips, 0.9)
    return stats


In [80]:
pip_stats = pip_error_by_horizon(
    y_test_real,
    test_pred,
    pip_value
)

for k, v in pip_stats.items():
    print(f"{k}: {v:.2f}")


J+1_MAE_pips: 61.06
J+1_P90_pips: 128.49
J+2_MAE_pips: 71.80
J+2_P90_pips: 145.81
J+3_MAE_pips: 83.52
J+3_P90_pips: 172.78
J+4_MAE_pips: 92.96
J+4_P90_pips: 189.70
J+5_MAE_pips: 100.55
J+5_P90_pips: 209.56


In [81]:
def plot_prices(
    df,
    y_train_real,
    y_test_real,
    train_pred,
    test_pred,
    window_size,
    horizon_idx=0,
    title=None
):
    """
    horizon_idx = 0 -> J+1
                  1 -> J+2
                  ...
                  4 -> J+5
    """

    if title is None:
        title = f"Multi-step LSTM – Prediction J+{horizon_idx+1}"

    plot_df = pd.DataFrame(index=df.index)

    plot_df["Real Price"] = df["close"].astype(float)
    plot_df["Train Prediction"] = np.nan
    plot_df["Test Prediction"] = np.nan

    # 🔹 positions temporelles
    train_len = len(train_pred)
    test_len = len(test_pred)

    # Train predictions (J+h)
    plot_df.iloc[
        window_size + horizon_idx :
        window_size + horizon_idx + train_len,
        plot_df.columns.get_loc("Train Prediction")
    ] = train_pred[:, horizon_idx]

    # Test predictions (J+h)
    plot_df.iloc[
        window_size + horizon_idx + train_len :
        window_size + horizon_idx + train_len + test_len,
        plot_df.columns.get_loc("Test Prediction")
    ] = test_pred[:, horizon_idx]

    fig = px.line(
        plot_df,
        x=plot_df.index,
        y=plot_df.columns,
        title=title,
        labels={"value": "Price", "index": "Date"}
    )

    fig.update_layout(
        hovermode="x unified",
        template="plotly_white"
    )

    fig.show()

In [82]:
plot_loss(history)

In [83]:
plot_prices(
    df=df,
    y_train_real=y_train_real,
    y_test_real=y_test_real,
    train_pred=train_pred,
    test_pred=test_pred,
    window_size=WINDOW_SIZE,
    horizon_idx=2   # J+3
)

plot_prices(
    df=df,
    y_train_real=y_train_real,
    y_test_real=y_test_real,
    train_pred=train_pred,
    test_pred=test_pred,
    window_size=WINDOW_SIZE,
    horizon_idx=4   # J+5
)


In [89]:
def build_direction_df_j5(
    df,
    y_test_real,
    test_pred,
    window_size,
    train_len,
    horizon_idx=4,   # J+5
    pip_value=None
):
    """
    horizon_idx = 4 -> J+5
    """

    start = window_size + train_len
    idx = df.index[start:start + len(y_test_real)]

    ref_price = df["close"].loc[idx].values

    real_j5 = y_test_real[:, horizon_idx]
    pred_j5 = test_pred[:, horizon_idx]

    out = pd.DataFrame({
        "time": idx,
        "ref_price": ref_price,
        "real_j5": real_j5,
        "pred_j5": pred_j5
    }).set_index("time")

    # variations à 5 jours
    out["real_delta"] = out["real_j5"] - out["ref_price"]
    out["pred_delta"] = out["pred_j5"] - out["ref_price"]

    # directions
    out["real_dir"] = np.sign(out["real_delta"])
    out["pred_dir"] = np.sign(out["pred_delta"])

    # direction correcte ?
    out["direction_ok"] = (out["real_dir"] == out["pred_dir"]).astype(int)

    # amplitude (optionnel, mais très utile)
    if pip_value is not None:
        out["real_move_pips"] = out["real_delta"] / pip_value
        out["pred_move_pips"] = out["pred_delta"] / pip_value

    return out

def direction_metrics_j5(dir_df):
    metrics = {}

    # accuracy directionnelle brute
    metrics["Directional_accuracy_%"] = float(dir_df["direction_ok"].mean() * 100)

    # conditionnelles
    up_mask = dir_df["pred_dir"] > 0
    down_mask = dir_df["pred_dir"] < 0

    metrics["Accuracy_when_predict_UP_%"] = float(
        dir_df.loc[up_mask, "direction_ok"].mean() * 100
    ) if up_mask.any() else np.nan

    metrics["Accuracy_when_predict_DOWN_%"] = float(
        dir_df.loc[down_mask, "direction_ok"].mean() * 100
    ) if down_mask.any() else np.nan

    # biais directionnel
    metrics["Predicted_UP_%"] = float((dir_df["pred_dir"] > 0).mean() * 100)
    metrics["Predicted_DOWN_%"] = float((dir_df["pred_dir"] < 0).mean() * 100)

    # amplitude médiane (si présente)
    if "real_move_pips" in dir_df.columns:
        metrics["Median_real_move_pips"] = float(dir_df["real_move_pips"].abs().median())
        metrics["Median_pred_move_pips"] = float(dir_df["pred_move_pips"].abs().median())

    return metrics

def plot_direction_hits(dir_df, title="J+5 Directional accuracy over time"):
    fig = px.scatter(
        dir_df,
        x=dir_df.index,
        y="real_delta",
        color=dir_df["direction_ok"].map({1: "Correct", 0: "Wrong"}),
        title=title,
        labels={"real_delta": "Real 5-day move"}
    )
    fig.update_layout(template="plotly_white")
    fig.show()

def plot_direction_confusion(dir_df):
    conf = pd.crosstab(
        dir_df["real_dir"],
        dir_df["pred_dir"],
        normalize="index"
    )

    # mapping dynamique des labels
    label_map = {
        -1: "DOWN",
         0: "FLAT",
         1: "UP"
    }

    conf.index = [f"Real {label_map.get(i, i)}" for i in conf.index]
    conf.columns = [f"Pred {label_map.get(i, i)}" for i in conf.columns]

    fig = px.imshow(
        conf.values,
        x=conf.columns,
        y=conf.index,
        text_auto=".2f",
        title="J+5 Direction confusion matrix",
        aspect="auto"
    )

    fig.update_layout(template="plotly_white")
    fig.show()


In [91]:
def direction_metrics_j5_filtered(dir_df):
    metrics = {}

    # trades uniquement (LONG ou SHORT)
    traded = dir_df["pred_dir_filtered"] != 0
    n_total = len(dir_df)
    n_trades = traded.sum()

    metrics["Trade_frequency_%"] = float(n_trades / n_total * 100)

    if n_trades > 0:
        metrics["Directional_accuracy_on_trades_%"] = float(
            (dir_df.loc[traded, "real_dir"] == dir_df.loc[traded, "pred_dir_filtered"]).mean() * 100
        )
    else:
        metrics["Directional_accuracy_on_trades_%"] = np.nan

    # accuracy globale (flat = ignoré)
    metrics["Coverage_%"] = metrics["Trade_frequency_%"]

    # biais directionnel
    metrics["Predicted_UP_%"] = float(
        (dir_df["pred_dir_filtered"] > 0).mean() * 100
    )
    metrics["Predicted_DOWN_%"] = float(
        (dir_df["pred_dir_filtered"] < 0).mean() * 100
    )
    metrics["Predicted_FLAT_%"] = float(
        (dir_df["pred_dir_filtered"] == 0).mean() * 100
    )

    return metrics


def plot_direction_confusion_filtered(dir_df):
    conf = pd.crosstab(
        dir_df["real_dir"],
        dir_df["pred_dir_filtered"],
        normalize="index"
    )

    label_map = {-1: "DOWN", 0: "FLAT", 1: "UP"}
    conf.index = [f"Real {label_map.get(i, i)}" for i in conf.index]
    conf.columns = [f"Pred {label_map.get(i, i)}" for i in conf.columns]

    fig = px.imshow(
        conf.values,
        x=conf.columns,
        y=conf.index,
        text_auto=".2f",
        title="J+5 Direction confusion (filtered)",
        aspect="auto"
    )
    fig.update_layout(template="plotly_white")
    fig.show()


In [None]:
dir_df = build_direction_df_j5(
    df=df,
    y_test_real=y_test_real,
    test_pred=test_pred,
    window_size=WINDOW_SIZE,
    train_len=len(train_pred),
    horizon_idx=4,   # J+5
    pip_value=pip_value
)

metrics = direction_metrics_j5(dir_df)

print("📊 J+5 Directional characterization:")
for k, v in metrics.items():
    print(f"{k}: {v:.2f}")

plot_direction_hits(dir_df)
plot_direction_confusion(dir_df)


📊 J+5 Directional characterization:
Directional_accuracy_%: 52.63
Accuracy_when_predict_UP_%: 63.64
Accuracy_when_predict_DOWN_%: 42.06
Predicted_UP_%: 48.99
Predicted_DOWN_%: 51.01
Median_real_move_pips: 63.70
Median_pred_move_pips: 46.08
