In [1]:
import pandas as pd
import joblib
from backtesting import Backtest, Strategy
import pytz # We'll need this for our dynamic session times
from datetime import datetime, time



### **Step 2: Building the Strategy's Brain**
This is the most important part. We will define a class that contains our trading logic.

In [None]:
# --- Step 2: Define the Trading Strategy ---
print("Defining the ML-based trading strategy...")

class MLStrategy(Strategy):
    # --- The init() method is called once at the start ---
    def init(self):
        print("Initializing strategy...")
        # 1. Load the pre-trained champion model
        model_path = '../models/xgb_classifier_hyp_a_xauusd_h1_2018_present.joblib'
        self.model = joblib.load(model_path)
        print("Model loaded successfully.")

        # 2. Load the feature data that corresponds to the model
        features_path = '../data/processed/hyp_a_features_xauusd_h1_2018_present.parquet'
        self.features = pd.read_parquet(features_path)
        print("Feature data loaded successfully.")
        
        # 3. Store the London timezone for dynamic open times
        self.london_tz = pytz.timezone('Europe/London')

    # --- The next() method is called for each new candle ---
    def next(self):
        # self.data.index[-1] gives us the timestamp of the current candle
        current_time_utc = self.data.index[-1]
        
        # --- Determine the London open time for THIS specific day ---
        current_date = current_time_utc.date()
        london_open_local = self.london_tz.localize(datetime.combine(current_date, time(8, 0)))
        london_open_utc = london_open_local.astimezone(pytz.utc)

        # --- TRADING LOGIC ---
        # We only want to make one decision per day, exactly at the London open.
        if current_time_utc == london_open_utc:
            
            # Defensive check: Make sure we have features for today
            if current_date not in self.features.index:
                return # If no features, do nothing

            # 1. Get today's features
            # We select the row for the current date and drop the target columns
            today_features = self.features.loc[[current_date]].drop(columns=['london_direction', 'london_return', 'timeframe', 'symbol'])
            
            # 2. Use the model to make a prediction
            prediction = self.model.predict(today_features)[0] # [0] to get the single value
            
            # 3. Execute the trade based on the prediction
            # We will also close any existing position before opening a new one.
            # This ensures we only hold one position at a time, for one day.
            if prediction == 1: # Model predicts Bullish
                self.position.close() # Close any short position from a previous day
                self.buy() # Open a new long position
                
            elif prediction == 0: # Model predicts Bearish
                self.position.close() # Close any long position
                self.sell() # Open a new short position

Defining the ML-based trading strategy...


In [3]:
features_path = '../data/processed/hyp_a_features_xauusd_h1_2018_present.parquet'
features = pd.read_parquet(features_path)
features

Unnamed: 0_level_0,day_of_week,asia_return,asia_range,atr_at_asia_close,rsi_at_asia_close,ema50_dist,ema200_dist,london_direction,london_return
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018-01-15,0,0.005629,8.68,2.664710,76.767280,0.010549,0.018260,0,-0.002151
2018-01-16,1,0.001135,4.11,2.135934,56.859165,0.003920,0.013381,0,-0.006220
2018-01-17,2,-0.003264,10.02,2.947149,41.565265,-0.001740,0.006212,1,0.000997
2018-01-18,3,-0.000324,6.27,3.006143,42.778712,-0.003702,0.000870,1,0.001731
2018-01-19,4,0.002190,6.97,2.545249,55.094778,0.000468,0.002878,1,0.001953
...,...,...,...,...,...,...,...,...,...
2025-09-18,3,0.000822,25.40,11.136647,39.790183,-0.004722,0.005407,1,0.001320
2025-09-19,4,0.004817,27.93,9.654557,55.276912,0.000320,0.005949,0,-0.001251
2025-09-22,0,0.001815,13.78,8.396011,66.165683,0.006438,0.013073,1,0.007079
2025-09-23,1,-0.000755,22.33,8.872938,64.019476,0.009458,0.022315,1,0.010095


In [4]:
# --- Step 2: Define the Trading Strategy (FINAL CORRECTED Version) ---
print("Defining the ML-based trading strategy...")

class MLStrategy(Strategy):
    def init(self):
        print("Initializing strategy...")
        model_path = '../models/xgb_classifier_hyp_a_xauusd_h1_2018_present.joblib'
        self.model = joblib.load(model_path)
        print("Model loaded successfully.")

        features_path = '../data/processed/hyp_a_features_xauusd_h1_2018_present.parquet'
        self.features = pd.read_parquet(features_path)
        print("Feature data loaded successfully.")
        
        self.london_tz = pytz.timezone('Europe/London')
        self._last_trade_date = None

    def next(self):
        current_time_utc = self.data.index[-1]
        current_date = current_time_utc.date()
        current_hour_utc = current_time_utc.hour
        
        if self._last_trade_date == current_date:
            return

        london_open_local = self.london_tz.localize(datetime.combine(current_date, time(8, 0)))
        london_open_utc = london_open_local.astimezone(pytz.utc)
        target_trade_hour_utc = london_open_utc.hour

        if current_hour_utc == target_trade_hour_utc:
            
            # --- THE FIX IS HERE ---
            # Convert the current_date object to a Pandas Timestamp to match the index type.
            current_date_ts = pd.to_datetime(current_date)
            
            # Now, check if this Timestamp is in our features index.
            if current_date_ts not in self.features.index:
                print(f"Skipping trade on {current_date}: No features found.")
                return

            self._last_trade_date = current_date
            
            # Use the Timestamp to locate the features
            today_features = self.features.loc[[current_date_ts]].drop(columns=['london_direction', 'london_return'])
            
            prediction = self.model.predict(today_features)[0]
            print(f"TRADE SIGNAL on {current_date}: Prediction is {'BULLISH' if prediction == 1 else 'BEARISH'}")
            
            if prediction == 1:
                self.position.close()
                self.buy()
            elif prediction == 0:
                self.position.close()
                self.sell()

Defining the ML-based trading strategy...


### **Step 3: Preparing the Data and Running the Backtest**
Now that our "brain" is defined, we need to load the historical price data, connect it to our strategy, and press "Go".


In [5]:
# --- Step 3: Load Data and Run the Backtest ---
print("\nPreparing data for backtest...")

# 1. Load the raw hourly price data. The backtester needs this to simulate trades.
price_data = pd.read_parquet('../data/raw/xauusd_h1_2018_present.parquet')
price_data.set_index('time', inplace=True)
price_data = price_data.tz_localize('UTC') # Make it timezone-aware

# --- THE FIX IS HERE ---
# The backtesting.py library requires specific column names with capital letters.
# Let's rename our columns to match its requirements.
price_data.rename(columns={
    'open': 'Open',
    'high': 'High',
    'low': 'Low',
    'close': 'Close',
    'tick_volume': 'Volume' # We'll rename 'tick_volume' to 'Volume'
}, inplace=True)
print("Price data columns renamed to match backtesting.py requirements.")
# --- END OF FIX ---

# 2. Isolate the test period. We must backtest ONLY on the data the model has NOT seen.
# This ensures the test is fair.
train_size_raw = int(len(price_data) * 0.8)
backtest_data = price_data.iloc[train_size_raw:]
print(f"Backtesting on data from {backtest_data.index[0]} to {backtest_data.index[-1]}")

# 3. Configure and initialize the backtest engine
bt = Backtest(
    backtest_data,     # The price data to run the simulation on
    MLStrategy,        # Our custom strategy "brain"
    cash=10000,        # Starting cash of $10,000
    commission=.0002,  # A 0.02% commission to simulate broker fees/spread
    exclusive_orders=True # Ensures one position at a time
)

# 4. Run the backtest!
print("\nRunning backtest...")
stats = bt.run()
print("Backtest complete.")

# 5. Print the results and generate the plot
print("\n--- Backtest Results ---")
print(stats)

print("\nGenerating equity curve plot...")
bt.plot()


Preparing data for backtest...
Price data columns renamed to match backtesting.py requirements.
Backtesting on data from 2024-03-08 22:00:00+00:00 to 2025-09-24 08:00:00+00:00

Running backtest...
Initializing strategy...
Model loaded successfully.
Feature data loaded successfully.


Backtest.run:   0%|          | 0/9119 [00:00<?, ?bar/s]

TRADE SIGNAL on 2024-03-11: Prediction is BULLISH
TRADE SIGNAL on 2024-03-12: Prediction is BULLISH
TRADE SIGNAL on 2024-03-13: Prediction is BULLISH
TRADE SIGNAL on 2024-03-14: Prediction is BULLISH
TRADE SIGNAL on 2024-03-15: Prediction is BULLISH
TRADE SIGNAL on 2024-03-18: Prediction is BEARISH
TRADE SIGNAL on 2024-03-19: Prediction is BEARISH
TRADE SIGNAL on 2024-03-20: Prediction is BEARISH
TRADE SIGNAL on 2024-03-21: Prediction is BEARISH
TRADE SIGNAL on 2024-03-22: Prediction is BEARISH
TRADE SIGNAL on 2024-03-25: Prediction is BULLISH
TRADE SIGNAL on 2024-03-26: Prediction is BULLISH
TRADE SIGNAL on 2024-03-27: Prediction is BULLISH
TRADE SIGNAL on 2024-03-28: Prediction is BEARISH
TRADE SIGNAL on 2024-04-01: Prediction is BULLISH
TRADE SIGNAL on 2024-04-02: Prediction is BULLISH
TRADE SIGNAL on 2024-04-03: Prediction is BULLISH
TRADE SIGNAL on 2024-04-04: Prediction is BULLISH
TRADE SIGNAL on 2024-04-05: Prediction is BULLISH
TRADE SIGNAL on 2024-04-08: Prediction is BEARISH


  stats = bt.run()


Backtest complete.

--- Backtest Results ---
Start                     2024-03-08 22:00...
End                       2025-09-24 08:00...
Duration                    564 days 10:00:00
Exposure Time [%]                    99.87939
Equity Final [$]                  10903.28043
Equity Peak [$]                     11739.208
Commissions [$]                     1371.2523
Return [%]                             9.0328
Buy & Hold Return [%]                73.37905
Return (Ann.) [%]                      5.5993
Volatility (Ann.) [%]                14.63578
CAGR [%]                              3.93659
Sharpe Ratio                          0.38258
Sortino Ratio                         0.55636
Calmar Ratio                          0.40095
Alpha [%]                             9.70395
Beta                                 -0.00915
Max. Drawdown [%]                   -13.96499
Avg. Drawdown [%]                     -1.3056
Max. Drawdown Duration      207 days 16:00:00
Avg. Drawdown Duration       12 day

  return convert(array.astype("datetime64[us]"))
