In [1]:
# Import backtesting.py library
from backtesting import Backtest, Strategy
from backtesting.lib import crossover, resample_apply

# Import indicators thats not provided by backtesting.py
import talib

# Import pandas and numpy
import pandas as pd
import numpy as np

# import json and joblib for loading model
import joblib
import json

# remove warnings that can be ignored
import warnings
warnings.simplefilter("ignore")

# I. Load dataset

In [2]:
# read data from csv
goog_df = pd.read_csv('EURUSD_H4.csv')

In [3]:
# Show dataset heads and tails
goog_df

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2006-12-31 20:00,1.31958,1.31985,1.31887,1.31903,185311
1,2007-01-01 00:00,1.31917,1.31936,1.31655,1.31688,173882
2,2007-01-01 04:00,1.31689,1.31943,1.31650,1.31912,176756
3,2007-01-01 08:00,1.31933,1.32095,1.31863,1.31914,916275
4,2007-01-01 12:00,1.31942,1.32080,1.31837,1.32058,348929
...,...,...,...,...,...,...
25583,2022-11-08 00:00,0.99991,1.00054,0.99715,0.99945,115058
25584,2022-11-08 04:00,0.99944,1.00124,0.99851,1.00112,111555
25585,2022-11-08 08:00,1.00110,1.00963,1.00105,1.00672,139008
25586,2022-11-08 12:00,1.00671,1.00827,1.00539,1.00732,64625


In [4]:
# convert date type from string to datetime
goog_df['Date'] = pd.to_datetime(goog_df['Date'])

In [5]:
# take data after and on 2022 for backtest the model
goog_df = goog_df[(goog_df['Date'].apply(lambda x: x.year >= 2022))]

In [6]:
# reset index
goog_df.set_index('Date', inplace=True)

## Loading Model

In [7]:
# Model loading
with open('model.pkl', 'rb') as file_1:
  model = joblib.load(file_1)

with open('model_scaler.pkl', 'rb') as file_2:
  scaler = joblib.load(file_2)

with open('model_encoder.pkl', 'rb') as file_3:
  encoder = joblib.load(file_3)

with open('list_cat.txt', 'r') as file_5:
  cat_columns = json.load(file_5)

with open('list_num.txt', 'r') as file_4:
  num_columns = json.load(file_4)

# II. Backtesting

In [8]:
# backtesting with machine learning
class RsiOscillator(Strategy):

    # Define upper and lower bound on rsi
    upper_bound = 70
    lower_bound = 30
    rsi_window = 14
    
    # Initialize parameters
    with open('list_cat.txt', 'r') as file_5:
        cat_columns = json.load(file_5)

    with open('list_num.txt', 'r') as file_4:
        num_columns = json.load(file_4) 
    
    def init(self):

        self.daily_rsi = resample_apply('4H', talib.RSI, self.data.Close, self.rsi_window)

        self.weekly_rsi = resample_apply(
            'W-FRI', talib.RSI, self.data.Close, self.rsi_window
        )

    def prob(x):
        if x <= 0.28:
            return 'win'
        else:
            return 'loss'

    # backtesting function for every price changes
    def next(self):
        price = self.data.Close[-1]

        if (crossover(self.daily_rsi, self.upper_bound)):
            if self.position.is_long:
                self.position.close()

        elif (crossover(self.lower_bound, self.daily_rsi)):
            if self.position.is_short:
                self.position.close()

        if (crossover(self.daily_rsi, 51)):

            data_inf = {
                'EntryPrice': price,
                'long_short': 'long',
                'entry_month': self.data.index[-1].month,
                'entry_date': self.data.index[-1].day,
                'entry_day': self.data.index[-1].weekday(),
                'entry_hour': self.data.index[-1].hour
            }

            data_inf = pd.DataFrame([data_inf])
            inf_scaled = scaler.transform(data_inf[num_columns])
            inf_encoded = encoder.transform(data_inf[cat_columns])

            X_inf = np.concatenate((inf_encoded, inf_scaled), axis=1 )

            inf_pred = model.predict_proba(X_inf) # Use probabilyt instead to raise the precision

            if inf_pred[0][1] >= 0.78: # probability of prediction with classification 'win' in this case its 78%
                self.buy(size=0.25, sl=0.95*price)
            
            else:
                pass


        elif (crossover(49, self.daily_rsi)):
        
            data_inf = {
                'EntryPrice': price,
                'long_short': 'long',
                'entry_month': self.data.index[-1].month,
                'entry_date': self.data.index[-1].day,
                'entry_day': self.data.index[-1].weekday(),
                'entry_hour': self.data.index[-1].hour
            }
            
            data_inf = pd.DataFrame([data_inf])
            inf_scaled = scaler.transform(data_inf[num_columns])
            inf_encoded = encoder.transform(data_inf[cat_columns])

            X_inf = np.concatenate((inf_encoded, inf_scaled), axis=1 )

            inf_pred = model.predict_proba(X_inf) # Use probabilyt instead to raise the precision

            if inf_pred[0][1] >= 0.78: # probability of prediction with classification 'win' in this case its 78%
                self.sell(size=0.25, sl=1.05*price)
            
            else:
                pass

In [9]:
# Define Backtesting functions
bt = Backtest(goog_df, RsiOscillator, cash = 10000)

In [10]:
# Start backtesting
stats = bt.optimize(
    upper_bound = 70,
    lower_bound = 30,
    rsi_window = 14
)
stats

Start                     2022-01-02 12:00:00
End                       2022-11-08 16:00:00
Duration                    310 days 04:00:00
Exposure Time [%]                   16.473149
Equity Final [$]                  10045.61731
Equity Peak [$]                   10080.30794
Return [%]                           0.456173
Buy & Hold Return [%]              -11.519489
Return (Ann.) [%]                     0.43049
Volatility (Ann.) [%]                0.782096
Sharpe Ratio                         0.550432
Sortino Ratio                         0.96972
Calmar Ratio                         0.791883
Max. Drawdown [%]                   -0.543629
Avg. Drawdown [%]                   -0.294579
Max. Drawdown Duration      131 days 16:00:00
Avg. Drawdown Duration       31 days 04:00:00
# Trades                                   11
Win Rate [%]                        63.636364
Best Trade [%]                       1.449234
Worst Trade [%]                     -0.772036
Avg. Trade [%]                    

In [11]:
# show best parameters after running on optimizer
stats['_strategy']

<Strategy RsiOscillator(upper_bound=70,lower_bound=30,rsi_window=14)>

# III. Data Explorations

In [12]:
# shows trading results sorteed by entrytime
stats['_trades'].sort_values('EntryTime')

Unnamed: 0,Size,EntryBar,ExitBar,EntryPrice,ExitPrice,PnL,ReturnPct,EntryTime,ExitTime,Duration
0,-1768,551,556,1.05161,1.05814,-11.54504,-0.00621,2022-05-05 12:00:00,2022-05-06 08:00:00,0 days 20:00:00
1,-609,551,565,1.05161,1.05565,-2.46036,-0.003842,2022-05-05 12:00:00,2022-05-09 16:00:00,4 days 04:00:00
2,1484,565,608,1.05565,1.0475,-12.0946,-0.00772,2022-05-09 16:00:00,2022-05-18 16:00:00,9 days 00:00:00
5,119,565,625,1.05565,1.06683,1.33042,0.010591,2022-05-09 16:00:00,2022-05-23 08:00:00,13 days 16:00:00
4,1971,599,625,1.05159,1.06683,30.03804,0.014492,2022-05-17 04:00:00,2022-05-23 08:00:00,6 days 04:00:00
3,1835,612,625,1.05907,1.06683,14.2396,0.007327,2022-05-19 08:00:00,2022-05-23 08:00:00,4 days 00:00:00
8,-2387,759,798,1.04914,1.04515,9.52413,0.003803,2022-06-22 00:00:00,2022-06-30 08:00:00,8 days 08:00:00
7,-1781,767,798,1.05286,1.04515,13.73151,0.007323,2022-06-23 08:00:00,2022-06-30 08:00:00,7 days 00:00:00
6,-1336,787,798,1.0522,1.04515,9.4188,0.0067,2022-06-28 12:00:00,2022-06-30 08:00:00,1 days 20:00:00
9,-1839,910,918,1.0134,1.02021,-12.52359,-0.00672,2022-07-26 08:00:00,2022-07-27 16:00:00,1 days 08:00:00


In [13]:
# Show the results on a plot(limited to only for around 2000 bars?)
# bt.plot(resample='4H')

In [14]:
# Define results trade data to new variable
trades_data = stats['_trades'].copy()
trades_data.head()

Unnamed: 0,Size,EntryBar,ExitBar,EntryPrice,ExitPrice,PnL,ReturnPct,EntryTime,ExitTime,Duration
0,-1768,551,556,1.05161,1.05814,-11.54504,-0.00621,2022-05-05 12:00:00,2022-05-06 08:00:00,0 days 20:00:00
1,-609,551,565,1.05161,1.05565,-2.46036,-0.003842,2022-05-05 12:00:00,2022-05-09 16:00:00,4 days 04:00:00
2,1484,565,608,1.05565,1.0475,-12.0946,-0.00772,2022-05-09 16:00:00,2022-05-18 16:00:00,9 days 00:00:00
3,1835,612,625,1.05907,1.06683,14.2396,0.007327,2022-05-19 08:00:00,2022-05-23 08:00:00,4 days 00:00:00
4,1971,599,625,1.05159,1.06683,30.03804,0.014492,2022-05-17 04:00:00,2022-05-23 08:00:00,6 days 04:00:00


In [15]:
trades_data['PnL'].sum()

45.617309999999485

In [16]:
trades_data['PnL'].median()

5.958400000000097

In [17]:
trades_data['PnL'].mean()

4.147028181818135

In [18]:
trades_data['PnL'].min()

-12.523589999999968

In [19]:
trades_data['PnL'].max()

30.038039999999842

In [20]:
trades_data[trades_data['PnL'] < 0.3]

Unnamed: 0,Size,EntryBar,ExitBar,EntryPrice,ExitPrice,PnL,ReturnPct,EntryTime,ExitTime,Duration
0,-1768,551,556,1.05161,1.05814,-11.54504,-0.00621,2022-05-05 12:00:00,2022-05-06 08:00:00,0 days 20:00:00
1,-609,551,565,1.05161,1.05565,-2.46036,-0.003842,2022-05-05 12:00:00,2022-05-09 16:00:00,4 days 04:00:00
2,1484,565,608,1.05565,1.0475,-12.0946,-0.00772,2022-05-09 16:00:00,2022-05-18 16:00:00,9 days 00:00:00
9,-1839,910,918,1.0134,1.02021,-12.52359,-0.00672,2022-07-26 08:00:00,2022-07-27 16:00:00,1 days 08:00:00
