In [1]:
# --- Setup Project Path ---
import sys
import os

# Add the project root to the Python path to allow imports from 'src'
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

# --- General Imports ---
import yaml
import pandas as pd

# --- Custom Project Imports ---
from src.training_pipeline import TrainingPipeline
from src.backtester import VectorizedBacktester

# --- Load Configuration ---
config_path = os.path.join(project_root, 'configs', 'config.yaml')
print(f"Loading configuration from: {config_path}")
with open(config_path, 'r') as file:
    config = yaml.safe_load(file)
print("Configuration loaded successfully.")

Loading configuration from: C:\Projetos_Python\gld_lstm_strategy\configs\config.yaml
Configuration loaded successfully.


In [2]:
# --- 1. Run Pipeline with LSTM Model ---

# Instantiate the main pipeline
pipeline = TrainingPipeline(config=config, project_root=project_root)

print("="*50)
print("RUNNING PIPELINE FOR LSTM MODEL")
print("="*50)

# Run the static test specifically for the 'lstm' model type
lstm_results = pipeline.run_static_test(model_type='lstm')

print("\n\n✅ --- LSTM Pipeline Finished! --- ✅")

Random seeds set to 2025 for reproducibility.
RUNNING PIPELINE FOR LSTM MODEL


===== Starting STATIC Test Run for model_type='lstm' =====
===== Step 1: Loading and Preparing Full Dataset =====
--- Loading Main Asset Data ---
Loading GLD data from local cache: C:\Projetos_Python\gld_lstm_strategy\data\gld_data.csv

--- Loading Macroeconomic Data ---
Loading DX-Y.NYB data from local cache: C:\Projetos_Python\gld_lstm_strategy\data\dx-y.nyb_data.csv
Loading ^TNX data from local cache: C:\Projetos_Python\gld_lstm_strategy\data\^tnx_data.csv
Loading ^VIX data from local cache: C:\Projetos_Python\gld_lstm_strategy\data\^vix_data.csv
Loading CL=F data from local cache: C:\Projetos_Python\gld_lstm_strategy\data\cl=f_data.csv
Loading SI=F data from local cache: C:\Projetos_Python\gld_lstm_strategy\data\si=f_data.csv
Loading TIP data from local cache: C:\Projetos_Python\gld_lstm_strategy\data\tip_data.csv
Loading HG=F data from local cache: C:\Projetos_Python\gld_lstm_strategy\data\hg=f_data.cs

130it [00:28,  4.54it/s]


 -> Dropped 2 redundant TA columns.
Step 3: Creating custom interaction and ratio features...
Step 4: Creating lagged and momentum features...
Step 5: Merging macroeconomic features...
 -> Macro features merged and forward-filled.
Step 6: Defining target variable...

Pipeline complete. Dropped 77 rows with NaN values.
Final dataset shape: (2438, 247)


--- Splitting data chronologically ---
Train set size: 1761, Validation set size: 311, Test set size: 366

--- Running Feature Selection ---
Selected 37 features via BorutaPy.

--- Scaling data ---

--- Preparing data and training LSTM Model ---
Epoch 1/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 199ms/step - accuracy: 0.5331 - loss: 0.6951 - val_accuracy: 0.4180 - val_loss: 0.6958
Epoch 2/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 274ms/step - accuracy: 0.5675 - loss: 0.6833 - val_accuracy: 0.4180 - val_loss: 0.6996
Epoch 3/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16

In [3]:
# --- 2. Run Pipeline with XGBoost Model ---

# We can reuse the same pipeline instance
print("="*50)
print("RUNNING PIPELINE FOR XGBOOST MODEL")
print("="*50)

# Run the static test specifically for the 'xgboost' model type
xgboost_results = pipeline.run_static_test(model_type='xgboost')

print("\n\n✅ --- XGBoost Pipeline Finished! --- ✅")

RUNNING PIPELINE FOR XGBOOST MODEL


===== Starting STATIC Test Run for model_type='xgboost' =====
===== Step 1: Loading and Preparing Full Dataset =====
--- Loading Main Asset Data ---
Loading GLD data from local cache: C:\Projetos_Python\gld_lstm_strategy\data\gld_data.csv

--- Loading Macroeconomic Data ---
Loading DX-Y.NYB data from local cache: C:\Projetos_Python\gld_lstm_strategy\data\dx-y.nyb_data.csv
Loading ^TNX data from local cache: C:\Projetos_Python\gld_lstm_strategy\data\^tnx_data.csv
Loading ^VIX data from local cache: C:\Projetos_Python\gld_lstm_strategy\data\^vix_data.csv
Loading CL=F data from local cache: C:\Projetos_Python\gld_lstm_strategy\data\cl=f_data.csv
Loading SI=F data from local cache: C:\Projetos_Python\gld_lstm_strategy\data\si=f_data.csv
Loading TIP data from local cache: C:\Projetos_Python\gld_lstm_strategy\data\tip_data.csv
Loading HG=F data from local cache: C:\Projetos_Python\gld_lstm_strategy\data\hg=f_data.csv

===== Starting Feature Engineering Pi

130it [00:15,  8.13it/s]


 -> Dropped 2 redundant TA columns.
Step 3: Creating custom interaction and ratio features...
Step 4: Creating lagged and momentum features...
Step 5: Merging macroeconomic features...
 -> Macro features merged and forward-filled.
Step 6: Defining target variable...

Pipeline complete. Dropped 77 rows with NaN values.
Final dataset shape: (2438, 247)


--- Splitting data chronologically ---
Train set size: 1761, Validation set size: 311, Test set size: 366

--- Running Feature Selection ---
Selected 37 features via BorutaPy.

--- Scaling data ---

--- Preparing data and training XGBoost Model ---


TypeError: XGBClassifier.fit() got an unexpected keyword argument 'early_stopping_rounds'

In [None]:
# --- 3. Run Backtests for Both Models ---

def run_backtest_for_results(results_dict, config, model_name):
    """
    Helper function to run the backtest for a given results dictionary.
    """
    print("\n" + "="*50)
    print(f"RUNNING BACKTEST FOR {model_name.upper()} MODEL")
    print("="*50)
    
    # Extract necessary data from the results dictionary
    processed_data = results_dict['processed_data_for_backtest']
    y_pred_proba = results_dict['pred_probas']
    
    # Recreate X_test to get the prices and the correct date index
    X = processed_data.drop(columns=[config['TARGET_NAME']])
    train_val_size = int(len(X) * (1 - config['TEST_SIZE']))
    X_test = X.iloc[train_val_size:]
    
    # Convert probabilities to binary signals
    test_predictions = (y_pred_proba > 0.5).astype(int)

    # Create a pandas Series for the signals with the correct date index
    if model_name.lower() == 'lstm':
        signal_dates = X_test.index[config['TIME_STEPS']:]
    else: # XGBoost uses 2D data, so no offset is needed for the index
        signal_dates = X_test.index
        
    signals_series = pd.Series(test_predictions, index=signal_dates, name="signal")
    
    # Get the price data for the same period
    price_data_for_backtest = X_test.loc[signal_dates]

    # Instantiate and run the backtester
    backtester = VectorizedBacktester(
        price_data=price_data_for_backtest,
        signals=signals_series,
        config=config
    )
    
    portfolio = backtester.run(commission=0.001, slippage=0.001)
    return portfolio

# Run the backtest for each model's results
lstm_portfolio = run_backtest_for_results(lstm_results, config, "LSTM")
xgboost_portfolio = run_backtest_for_results(xgboost_results, config, "XGBoost")

In [None]:
# --- 4. Final Results Comparison ---

# Extract the statistics from both portfolio objects
lstm_stats = lstm_portfolio.stats()
xgboost_stats = xgboost_portfolio.stats()

# Define the key metrics we want to compare
metrics_to_compare = [
    'Total Return [%]',
    'Benchmark Return [%]',
    'Sharpe Ratio',
    'Sortino Ratio',
    'Max Drawdown [%]',
    'Win Rate [%]',
    'Profit Factor',
    'Total Trades'
]

# Create a comparison DataFrame
comparison_df = pd.DataFrame({
    'LSTM': lstm_stats[metrics_to_compare],
    'XGBoost': xgboost_stats[metrics_to_compare]
})

print("\n\n" + "="*50)
print("MODEL BENCHMARK COMPARISON")
print("="*50)
display(comparison_df.round(4))

## 5. Benchmark Conclusion

(Escreva sua análise aqui)

Ao analisar a tabela de comparação, podemos tirar as seguintes conclusões:
* **Retorno Total:** O modelo [LSTM/XGBoost] gerou um retorno maior.
* **Retorno Ajustado ao Risco:** O modelo [LSTM/XGBoost] apresentou um Sharpe e Sortino Ratio superior, indicando uma melhor performance para o risco assumido.
* **Controle de Risco:** O modelo [LSTM/XGBoost] teve um Max Drawdown menor, demonstrando maior capacidade de preservação de capital.
* **Consistência:** O modelo [LSTM/XGBoost] teve uma Taxa de Acerto (Win Rate) maior.

**Veredito:** Com base nesses resultados, o modelo **[LSTM/XGBoost]** parece ser a escolha superior para esta estratégia de trading, pois [explique sua razão, ex: "oferece o melhor balanço entre lucratividade e controle de risco."].