In [1]:
# Import backtesting.py library
from backtesting import Backtest, Strategy
from backtesting.lib import crossover, resample_apply

# Import indicators thats not provided by backtesting.py
import talib

# Import pandas and numpy
import pandas as pd
import numpy as np

# remove warnings that can be ignored
import warnings
warnings.simplefilter("ignore")

# I. Load dataset

In [2]:
# read data from csv
goog_df = pd.read_csv('EURUSD_H4.csv')

In [3]:
# Show dataset heads and tails
goog_df

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2006-12-31 20:00,1.31958,1.31985,1.31887,1.31903,185311
1,2007-01-01 00:00,1.31917,1.31936,1.31655,1.31688,173882
2,2007-01-01 04:00,1.31689,1.31943,1.31650,1.31912,176756
3,2007-01-01 08:00,1.31933,1.32095,1.31863,1.31914,916275
4,2007-01-01 12:00,1.31942,1.32080,1.31837,1.32058,348929
...,...,...,...,...,...,...
25583,2022-11-08 00:00,0.99991,1.00054,0.99715,0.99945,115058
25584,2022-11-08 04:00,0.99944,1.00124,0.99851,1.00112,111555
25585,2022-11-08 08:00,1.00110,1.00963,1.00105,1.00672,139008
25586,2022-11-08 12:00,1.00671,1.00827,1.00539,1.00732,64625


In [4]:
# convert date type from string to datetime
goog_df['Date'] = pd.to_datetime(goog_df['Date'])

In [5]:
# take data before 2022 for model training
goog_df = goog_df[(goog_df['Date'].apply(lambda x: x.year < 2022))]

In [6]:
# reset index
goog_df.set_index('Date', inplace=True)

# II. Backtesting

In [7]:
# backtesting without machine learning
class RsiOscillator(Strategy):
    
    # Define upper and lower bound on rsi
    upper_bound = 70
    lower_bound = 30
    rsi_window = 14

    # Initialize parameters
    def init(self):
        self.daily_rsi = resample_apply('4H', talib.RSI, self.data.Close, self.rsi_window)

        self.weekly_rsi = resample_apply(
            'W-FRI', talib.RSI, self.data.Close, self.rsi_window
        )


    # backtesting function for every price changes
    def next(self):
        price = self.data.Close[-1]

        if (crossover(self.daily_rsi, self.upper_bound)):
            if self.position.is_long:
                self.position.close()

        elif (crossover(self.lower_bound, self.daily_rsi)):
            if self.position.is_short:
                self.position.close()

        if (crossover(self.daily_rsi, 51)):
            self.buy(size=0.25, sl=0.95*price)
            
        elif (crossover(49, self.daily_rsi)):
            self.sell(size=0.25, sl=1.05*price)


# 

In [8]:
# Define Backtesting functions
bt = Backtest(goog_df, RsiOscillator, cash = 10000)

In [9]:
# Looking for best parameters optimizer
# stats = bt.optimize(
#     upper_bound = range(55,85,2),
#     lower_bound = range(45,15,2),
#     rsi_window = [11,12,13,14,17,20,21,22]
# )
# stats

In [10]:
# Start backtesting
stats = bt.optimize(
    upper_bound = 70,
    lower_bound = 30,
    rsi_window = 14
)
stats

Start                     2006-12-31 20:00:00
End                       2021-12-31 12:00:00
Duration                   5478 days 16:00:00
Exposure Time [%]                   77.984304
Equity Final [$]                 10364.413039
Equity Peak [$]                  10658.518175
Return [%]                            3.64413
Buy & Hold Return [%]              -13.825311
Return (Ann.) [%]                    0.192301
Volatility (Ann.) [%]                3.279667
Sharpe Ratio                         0.058634
Sortino Ratio                        0.086414
Calmar Ratio                         0.014422
Max. Drawdown [%]                   -13.33409
Avg. Drawdown [%]                   -0.798489
Max. Drawdown Duration     3948 days 12:00:00
Avg. Drawdown Duration      106 days 17:00:00
# Trades                                 2443
Win Rate [%]                        42.652476
Best Trade [%]                       7.119034
Worst Trade [%]                     -5.915388
Avg. Trade [%]                    

In [11]:
# show best parameters after running on optimizer
stats['_strategy']

<Strategy RsiOscillator(upper_bound=70,lower_bound=30,rsi_window=14)>

# III. Data Explorations

In [12]:
# shows trading results sorteed by entrytime
stats['_trades'].sort_values('EntryTime')

Unnamed: 0,Size,EntryBar,ExitBar,EntryPrice,ExitPrice,PnL,ReturnPct,EntryTime,ExitTime,Duration
0,-1024,499,506,1.35660,1.36250,-6.04160,-0.004349,2007-04-23 08:00:00,2007-04-24 12:00:00,1 days 04:00:00
1,-818,499,523,1.35660,1.36570,-7.44380,-0.006708,2007-04-23 08:00:00,2007-04-27 08:00:00,4 days 00:00:00
2,-137,503,523,1.35680,1.36570,-1.21930,-0.006560,2007-04-24 00:00:00,2007-04-27 08:00:00,3 days 08:00:00
3,-893,503,531,1.35680,1.36492,-7.25116,-0.005985,2007-04-24 00:00:00,2007-04-30 12:00:00,6 days 12:00:00
4,-351,503,558,1.35680,1.36072,-1.37592,-0.002889,2007-04-24 00:00:00,2007-05-06 20:00:00,12 days 20:00:00
...,...,...,...,...,...,...,...,...,...,...
2435,-459,24160,24209,1.12734,1.13848,-5.11326,-0.009882,2021-12-21 12:00:00,2021-12-31 12:00:00,10 days 00:00:00
2434,-341,24163,24209,1.12666,1.13848,-4.03062,-0.010491,2021-12-22 00:00:00,2021-12-31 12:00:00,9 days 12:00:00
2433,-386,24191,24209,1.13029,1.13848,-3.16134,-0.007246,2021-12-28 12:00:00,2021-12-31 12:00:00,3 days 00:00:00
2432,-441,24204,24209,1.13239,1.13848,-2.68569,-0.005378,2021-12-30 16:00:00,2021-12-31 12:00:00,0 days 20:00:00


In [13]:
# Show the results on a plot(limited to only for around 2000 bars?)
# bt.plot(resample='4H')

In [14]:
# Define results trade data to new variable
trades_data = stats['_trades'].copy()
trades_data.head()

Unnamed: 0,Size,EntryBar,ExitBar,EntryPrice,ExitPrice,PnL,ReturnPct,EntryTime,ExitTime,Duration
0,-1024,499,506,1.3566,1.3625,-6.0416,-0.004349,2007-04-23 08:00:00,2007-04-24 12:00:00,1 days 04:00:00
1,-818,499,523,1.3566,1.3657,-7.4438,-0.006708,2007-04-23 08:00:00,2007-04-27 08:00:00,4 days 00:00:00
2,-137,503,523,1.3568,1.3657,-1.2193,-0.00656,2007-04-24 00:00:00,2007-04-27 08:00:00,3 days 08:00:00
3,-893,503,531,1.3568,1.36492,-7.25116,-0.005985,2007-04-24 00:00:00,2007-04-30 12:00:00,6 days 12:00:00
4,-351,503,558,1.3568,1.36072,-1.37592,-0.002889,2007-04-24 00:00:00,2007-05-06 20:00:00,12 days 20:00:00


In [15]:
trades_data['PnL'].sum()

364.4130384999896

In [16]:
trades_data['PnL'].median()

-0.5760000000000005

In [17]:
trades_data['PnL'].mean()

0.14916620487105592

In [18]:
trades_data['PnL'].min()

-97.69390400000005

In [19]:
trades_data['PnL'].max()

77.91554999999994

In [20]:
trades_data[trades_data['PnL'] < 0.3]

Unnamed: 0,Size,EntryBar,ExitBar,EntryPrice,ExitPrice,PnL,ReturnPct,EntryTime,ExitTime,Duration
0,-1024,499,506,1.35660,1.36250,-6.04160,-0.004349,2007-04-23 08:00:00,2007-04-24 12:00:00,1 days 04:00:00
1,-818,499,523,1.35660,1.36570,-7.44380,-0.006708,2007-04-23 08:00:00,2007-04-27 08:00:00,4 days 00:00:00
2,-137,503,523,1.35680,1.36570,-1.21930,-0.006560,2007-04-24 00:00:00,2007-04-27 08:00:00,3 days 08:00:00
3,-893,503,531,1.35680,1.36492,-7.25116,-0.005985,2007-04-24 00:00:00,2007-04-30 12:00:00,6 days 12:00:00
4,-351,503,558,1.35680,1.36072,-1.37592,-0.002889,2007-04-24 00:00:00,2007-05-06 20:00:00,12 days 20:00:00
...,...,...,...,...,...,...,...,...,...,...
2438,-718,24148,24209,1.12371,1.13848,-10.60486,-0.013144,2021-12-19 12:00:00,2021-12-31 12:00:00,12 days 00:00:00
2439,-760,24130,24209,1.12566,1.13848,-9.74320,-0.011389,2021-12-14 16:00:00,2021-12-31 12:00:00,16 days 20:00:00
2440,-803,24125,24209,1.12826,1.13848,-8.20666,-0.009058,2021-12-13 20:00:00,2021-12-31 12:00:00,17 days 16:00:00
2441,-1072,24120,24209,1.12851,1.13848,-10.68784,-0.008835,2021-12-13 00:00:00,2021-12-31 12:00:00,18 days 12:00:00


# IV. Save new data for machine learning

In [21]:
# define function to classifiy which trade is win or loss by looking at the 'pnl' feature
def win_loss(pnl):
    if pnl >= 0.5:
        return 'win'
    else:
        return 'loss'


In [22]:
# define function to classifiy which trade is long or short by looking at the 'size' feature
def long_short(size):
    if size >= 0:
        return 'long'
    else:
        return 'short'


In [23]:
# Make new feature for classifying win or loss
trades_data['win_loss'] = trades_data['PnL'].apply(lambda x: win_loss(x))

In [24]:
# Make new feature for classifying long or short
trades_data['long_short'] = trades_data['Size'].apply(lambda x: long_short(x))

In [25]:
# summarize new data
trades_data.sample(5)

Unnamed: 0,Size,EntryBar,ExitBar,EntryPrice,ExitPrice,PnL,ReturnPct,EntryTime,ExitTime,Duration,win_loss,long_short
1919,711,18837,18880,1.16662,1.16254,-2.90088,-0.003497,2018-08-31 04:00:00,2018-09-11 00:00:00,10 days 20:00:00,loss,long
1300,116,12782,12949,1.24699,1.184659,-7.230338,-0.049985,2014-11-27 12:00:00,2015-01-06 12:00:00,40 days 00:00:00,loss,long
285,-650,3198,3231,1.39443,1.40095,-4.238,-0.004676,2008-12-22 16:00:00,2008-12-30 00:00:00,7 days 08:00:00,loss,short
1602,1072,15859,15863,1.09175,1.09002,-1.85456,-0.001585,2016-10-26 08:00:00,2016-10-27 00:00:00,0 days 16:00:00,loss,long
1365,-1594,13474,13476,1.11266,1.1194,-10.74356,-0.006058,2015-05-05 04:00:00,2015-05-05 12:00:00,0 days 08:00:00,loss,short


In [26]:
# Make new feature for classifying month, date, weekday, and hour of trade entry
trades_data['entry_month'] = trades_data['EntryTime'].apply(lambda x: x.month)
trades_data['entry_date'] = trades_data['EntryTime'].apply(lambda x: x.day)
trades_data['entry_day'] = trades_data['EntryTime'].apply(lambda x: x.weekday())
trades_data['entry_hour'] = trades_data['EntryTime'].apply(lambda x: x.hour)

In [27]:
# summarize new data
trades_data.sample(3)

Unnamed: 0,Size,EntryBar,ExitBar,EntryPrice,ExitPrice,PnL,ReturnPct,EntryTime,ExitTime,Duration,win_loss,long_short,entry_month,entry_date,entry_day,entry_hour
1771,982,17362,17379,1.18109,1.17643,-4.57612,-0.003946,2017-10-01 16:00:00,2017-10-04 12:00:00,2 days 20:00:00,loss,long,10,1,6,16
1882,23,18486,18547,1.17722,1.15941,-0.40963,-0.015129,2018-06-13 12:00:00,2018-06-27 08:00:00,13 days 20:00:00,loss,long,6,13,2,12
2229,938,22183,22218,1.16983,1.17313,3.0954,0.002821,2020-09-29 04:00:00,2020-10-06 20:00:00,7 days 16:00:00,win,long,9,29,1,4


In [28]:
# Drop some variables, because these data can only be gathered after the trade closed.
list_drop = ['Size', 'ReturnPct', 'ExitPrice', 'PnL','ExitTime', 'Duration', 'EntryBar', 'ExitBar', 'EntryTime']
for i in list_drop:
    trades_data.drop(i, axis=1, inplace=True)

In [29]:
# Save the new data to a csv file
trades_data.to_csv('trades_data.csv', index=False)