In [1]:
import sys
import os
import warnings
from pathlib import Path
import datetime

import numpy as np
import pandas as pd
import MetaTrader5 as mt5
import joblib
import optuna

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

warnings.filterwarnings("ignore")

# 1) PROJECT ROOT
project_root = Path.cwd().parent.parent
sys.path.append(str(project_root))
os.chdir(str(project_root))

# 2) IMPORTS FROM YOUR MODULES
from data.data_loader import get_data_mt5
from features.feature_engineering import add_momentum_indicators
from features.labeling_schemes import create_labels_momentum_classification

###############################################################################
# A) LOAD & PREPARE DATA
###############################################################################
if not mt5.initialize():
    print("Failed to initialize MT5")
else:
    data = get_data_mt5(symbol="BTCUSD", timeframe=mt5.TIMEFRAME_H4, n_bars=3000, start_pos=3000)
    mt5.shutdown()

# Suppose you have a function that adds short/long momentum, MA, Hurst
df = add_momentum_indicators(data)

# Suppose you have a labeling function that sets 'momentum_label' in {0,1,2}
# 0 => Sell, 1 => Hold, 2 => Buy
df = create_labels_momentum_classification(df, horizon=10, threshold=0.0)
df.dropna(inplace=True)

# Choose features
feature_cols = ["mom_10", "mom_30", "ma_90", "hurst"]
X_full = df[feature_cols].values  # shape: (num_samples, num_features)
y_full = df["momentum_label"].values  # shape: (num_samples,)

print("X_full shape:", X_full.shape)
print("Unique y labels:", np.unique(y_full))

# We'll define a lookback for the LSTM
lookback = 10

###############################################################################
# B) CREATE SEQUENCES FOR LSTM CLASSIFICATION
###############################################################################
def create_sequences_classification(X, y, lookback):
    X_seq, y_seq = [], []
    for i in range(len(X) - lookback):
        X_seq.append(X[i : i + lookback])
        y_seq.append(y[i + lookback])
    return np.array(X_seq), np.array(y_seq)

###############################################################################
# C) BUILD LSTM MODEL FOR CLASSIFICATION
###############################################################################
def build_lstm_model_classification(n_units, dropout_rate, learning_rate, lookback, n_features, n_classes=3):
    model = Sequential([
        LSTM(n_units, return_sequences=True, input_shape=(lookback, n_features)),
        Dropout(dropout_rate),
        LSTM(n_units),
        Dense(n_classes, activation='softmax')  # 3 classes: 0,1,2
    ])
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
    return model

###############################################################################
# D) OPTUNA OBJECTIVE (With TensorBoard logging)
###############################################################################
def objective(trial):
    n_units = trial.suggest_int("n_units", 32, 128, step=32)
    dropout_rate = trial.suggest_float("dropout_rate", 0.0, 0.5, step=0.1)
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True)
    epochs = trial.suggest_int("epochs", 10, 50, step=10)
    batch_size = trial.suggest_int("batch_size", 16, 64, step=16)

    tscv = TimeSeriesSplit(n_splits=3)
    accuracies = []

    for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(X_full)):
        X_train_raw, X_test_raw = X_full[train_idx], X_full[test_idx]
        y_train_raw, y_test_raw = y_full[train_idx], y_full[test_idx]

        # Scale
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train_raw)
        X_test_scaled = scaler.transform(X_test_raw)

        # Create sequences
        X_train_seq, y_train_seq = create_sequences_classification(X_train_scaled, y_train_raw, lookback)
        X_test_seq, y_test_seq = create_sequences_classification(X_test_scaled, y_test_raw, lookback)

        # Build model
        model = build_lstm_model_classification(
            n_units, dropout_rate, learning_rate, 
            lookback=lookback,
            n_features=len(feature_cols),
            n_classes=3
        )

        # TensorBoard callback for each fold
        log_dir = f"logs/optuna/trial_{trial.number}_fold_{fold_idx}_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}"
        tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

        # Train
        model.fit(
            X_train_seq, y_train_seq,
            epochs=epochs,
            batch_size=batch_size,
            verbose=0,
            callbacks=[tensorboard_callback]
        )

        # Evaluate
        preds = model.predict(X_test_seq, verbose=0)
        pred_classes = np.argmax(preds, axis=1)

        fold_acc = accuracy_score(y_test_seq, pred_classes)
        accuracies.append(fold_acc)

    # We'll do direction="maximize" in create_study if we want to maximize accuracy
    # but if direction="minimize", we can return negative accuracy
    return -np.mean(accuracies)

###############################################################################
# E) RUN OPTUNA STUDY
###############################################################################
study = optuna.create_study(direction="minimize")  # or direction="maximize" + invert in objective
study.optimize(objective, n_trials=10, timeout=1800)

best_params = study.best_trial.params
best_value = study.best_trial.value

print("\nBest Trial => Accuracy:", -best_value)  # because we used negative
print("Params:", best_params)

###############################################################################
# F) VISUALIZE OPTUNA RESULTS
###############################################################################
import optuna.visualization as ov
ov.plot_optimization_history(study).show()
ov.plot_param_importances(study).show()

###############################################################################
# G) RETRAIN ON FULL DATA WITH BEST PARAMS
###############################################################################
n_units = best_params["n_units"]
dropout_rate = best_params["dropout_rate"]
learning_rate = best_params["learning_rate"]
epochs = best_params["epochs"]
batch_size = best_params["batch_size"]

# Scale entire dataset
scaler_final = StandardScaler()
X_full_scaled = scaler_final.fit_transform(X_full)

X_all_seq, y_all_seq = create_sequences_classification(X_full_scaled, y_full, lookback)

final_model = build_lstm_model_classification(
    n_units, dropout_rate, learning_rate,
    lookback=lookback,
    n_features=len(feature_cols),
    n_classes=3
)

final_model.fit(
    X_all_seq, y_all_seq,
    epochs=epochs,
    batch_size=batch_size,
    verbose=1
)

final_model.save("models/saved_models/best_lstm_momentum_classification.h5")
joblib.dump(scaler_final, "models/saved_models/scaler_momentum_classification.pkl")

print("Saved final momentum classification model + scaler!")


[I 2025-03-03 14:40:04,398] A new study created in memory with name: no-name-cc179a91-a63f-47d4-882c-453c9329a6df


X_full shape: (2900, 4)
Unique y labels: [0 2]


[I 2025-03-03 14:42:20,519] Trial 0 finished with value: -0.5300699300699301 and parameters: {'n_units': 32, 'dropout_rate': 0.1, 'learning_rate': 0.00011602366876197863, 'epochs': 30, 'batch_size': 16}. Best is trial 0 with value: -0.5300699300699301.
[I 2025-03-03 14:43:40,011] Trial 1 finished with value: -0.5016317016317017 and parameters: {'n_units': 96, 'dropout_rate': 0.30000000000000004, 'learning_rate': 0.0033250450966077795, 'epochs': 20, 'batch_size': 64}. Best is trial 0 with value: -0.5300699300699301.
[I 2025-03-03 14:45:39,973] Trial 2 finished with value: -0.48205128205128206 and parameters: {'n_units': 32, 'dropout_rate': 0.1, 'learning_rate': 0.0005971822072690268, 'epochs': 50, 'batch_size': 64}. Best is trial 0 with value: -0.5300699300699301.
[I 2025-03-03 14:46:43,028] Trial 3 finished with value: -0.48531468531468525 and parameters: {'n_units': 32, 'dropout_rate': 0.0, 'learning_rate': 0.002735665026261543, 'epochs': 20, 'batch_size': 48}. Best is trial 0 with va


Best Trial => Accuracy: 0.5300699300699301
Params: {'n_units': 32, 'dropout_rate': 0.1, 'learning_rate': 0.00011602366876197863, 'epochs': 30, 'batch_size': 16}


Epoch 1/30
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 15ms/step - accuracy: 0.3219 - loss: 1.0889
Epoch 2/30
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 16ms/step - accuracy: 0.5326 - loss: 0.9527
Epoch 3/30
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step - accuracy: 0.5517 - loss: 0.7641
Epoch 4/30
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.5736 - loss: 0.6994
Epoch 5/30
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step - accuracy: 0.5621 - loss: 0.6922
Epoch 6/30
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 18ms/step - accuracy: 0.5588 - loss: 0.6865
Epoch 7/30
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 16ms/step - accuracy: 0.5771 - loss: 0.6800
Epoch 8/30
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5904 - loss: 0.6815
Epoch 9/30
[1m181/181[0m [32m━



Saved final momentum classification model + scaler!


In [1]:
import sys
import os
import warnings
from pathlib import Path

# ---------------------------------------------------------------------------
# 1) SET PROJECT ROOT AND UPDATE PATH/WORKING DIRECTORY
# ---------------------------------------------------------------------------
project_root = Path.cwd().parent.parent
sys.path.append(str(project_root))
os.chdir(str(project_root))
warnings.filterwarnings("ignore")
import joblib
import numpy as np
import pandas as pd
import MetaTrader5 as mt5
import vectorbt as vbt
import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from tensorflow.keras.optimizers import Adam

# Our modules
from data.data_loader import get_data_mt5
from features.feature_engineering import add_momentum_indicators
from features.labeling_schemes import create_labels_momentum_classification

###########################################################
# 1) DATA LOADING & FEATURE ENGINEERING
###########################################################
if not mt5.initialize():
    print("Failed to initialize MT5")
else:
    # Fetch 2000 most recent bars for backtesting
    data = get_data_mt5(symbol="BTCUSD", timeframe=mt5.TIMEFRAME_H4, n_bars=2000, start_pos=0)
    mt5.shutdown()

# 1) Add momentum indicators (e.g., short/long momentum, MA, Hurst)
df = add_momentum_indicators(data)

# 2) Create multi-class momentum labels (0=Sell, 1=Hold, 2=Buy)
df = create_labels_momentum_classification(df, horizon=10, threshold=0.0)
df.dropna(inplace=True)
df.sort_index(inplace=True)  # Ensure chronological order

# Choose the features used in training
feature_cols = ["mom_10", "mom_30", "ma_90", "hurst"]
X = df[feature_cols].values
y = df["momentum_label"].values  # 0 => Sell, 1 => Hold, 2 => Buy

print(f"Full Dataset Size: {len(X)} bars")
print("Features used:", feature_cols)
print("Unique momentum classes:", np.unique(y))

###########################################################
# 2) LOAD BEST LSTM MOMENTUM MODEL & SCALER
###########################################################
model_path = "models/saved_models/best_lstm_momentum_classification.h5"
best_model = load_model(model_path)
# For classification, we compile with 'sparse_categorical_crossentropy'
best_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

# Load the saved scaler from training
scaler_path = "models/saved_models/scaler_momentum_classification.pkl"
if os.path.exists(scaler_path):
    scaler = joblib.load(scaler_path)
    print(f"Loaded scaler from {scaler_path}")
else:
    print("No saved scaler found. Fitting a new one on the entire dataset (not recommended for live usage).")
    scaler = StandardScaler()
    scaler.fit(X)

# Scale the entire dataset
X_scaled = scaler.transform(X)

# We'll use the same lookback as in training
lookback = 10
fees = 0.0002  # e.g., 0.02% transaction cost

###########################################################
# 3) CREATE SEQUENCES FOR CLASSIFICATION
###########################################################
def create_sequences_classification(X_data, y_data, lookback):
    X_seq, y_seq = [], []
    for i in range(len(X_data) - lookback):
        X_seq.append(X_data[i : i + lookback])
        y_seq.append(y_data[i + lookback])
    return np.array(X_seq), np.array(y_seq)

X_seq, y_seq = create_sequences_classification(X_scaled, y, lookback)
print(f"Total Sequences Created: {len(X_seq)}")

###########################################################
# 4) PREDICT CLASSES & CONVERT TO TRADING SIGNALS
###########################################################
print("\nPredicting momentum classes on the entire dataset (No retraining)...")
pred_probs = best_model.predict(X_seq)
pred_classes = np.argmax(pred_probs, axis=1)  # 0 => Sell, 1 => Hold, 2 => Buy

# (Optional) Evaluate final accuracy
acc = accuracy_score(y_seq, pred_classes)
print(f"Classification Accuracy (entire dataset after lookback): {acc:.4f}")

# Convert classes to signals:
# 0 => Sell => -1
# 1 => Hold => 0
# 2 => Buy  => +1
class_to_signal = {0: -1, 1: 0, 2: 1}
signals = np.array([class_to_signal[c] for c in pred_classes])

# Align signals with dataset index (after lookback)
df_test = df.iloc[lookback:].copy()
close_prices = df_test["close"]

if len(signals) < len(close_prices):
    pad_len = len(close_prices) - len(signals)
    signals = np.append(signals, [0]*pad_len)

signals_s = pd.Series(signals, index=close_prices.index)

###########################################################
# 5) RUN FULL BACKTEST USING VECTORBT
###########################################################
print("\nRunning Full Backtest with Momentum Classification LSTM...")

pf = vbt.Portfolio.from_signals(
    close_prices,
    entries=signals_s > 0,  # class 2 => buy
    exits=signals_s < 0,    # class 0 => sell
    init_cash=10000,
    freq='4H',
    fees=fees
)

total_return = pf.total_return()
sharpe_ratio = pf.sharpe_ratio()

# Print final results
print("\nFull Backtest Results:")
print(f"Accuracy={acc:.2f}, Return={total_return:.2f}%, Sharpe={sharpe_ratio:.2f}")
print(pf.stats())

# Plot the backtest results
fig = pf.plot()
fig.show()




Full Dataset Size: 1900 bars
Features used: ['mom_10', 'mom_30', 'ma_90', 'hurst']
Unique momentum classes: [0 2]
Loaded scaler from models/saved_models/scaler_momentum_classification.pkl
Total Sequences Created: 1890

Predicting momentum classes on the entire dataset (No retraining)...
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
Classification Accuracy (entire dataset after lookback): 0.4937

Running Full Backtest with Momentum Classification LSTM...

Full Backtest Results:
Accuracy=0.49, Return=0.21%, Sharpe=0.78
Start                               2024-04-21 04:00:00
End                                 2025-03-02 00:00:00
Period                                315 days 00:00:00
Start Value                                     10000.0
End Value                                   12138.43894
Total Return [%]                              21.384389
Benchmark Return [%]                          32.581465
Max Gross Exposure [%]                            100.0
To