In [1]:
import numpy as np
import pandas as pd

from backtesting import Strategy
from backtesting.lib import crossover

from backtesting import Backtest

from scripts.data_preparation import load_trades_from_csv, get_bar_stats

import pickle



In [2]:
path = 'data/trades_RIU2@FORTS_2022_08_19_2022_08_19.csv'
trades = load_trades_from_csv(path)
# display(trades)
# trades.info()
# display('Duplicates count',trades.duplicated().sum())

In [3]:
resampled = trades.set_index('datetime').groupby(pd.Grouper(freq='1Min'))
# time_bars = get_bar_stats(resampled)
one_minute_bar = get_bar_stats(resampled)
one_minute_bar = one_minute_bar.dropna()

In [4]:
one_minute_bar

Unnamed: 0_level_0,open,high,low,close,vwap,volume,txn
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-08-19 10:00:00,111670.0,111970.0,111300.0,111700.0,0,1455,832
2022-08-19 10:01:00,111700.0,111850.0,111580.0,111670.0,0,731,466
2022-08-19 10:02:00,111670.0,111670.0,111400.0,111520.0,0,715,413
2022-08-19 10:03:00,111530.0,111730.0,111360.0,111430.0,0,636,440
2022-08-19 10:04:00,111450.0,111670.0,111210.0,111250.0,0,1014,563
...,...,...,...,...,...,...,...
2022-08-19 23:45:00,112480.0,112480.0,112450.0,112470.0,0,23,12
2022-08-19 23:46:00,112470.0,112490.0,112450.0,112490.0,0,31,14
2022-08-19 23:47:00,112490.0,112490.0,112430.0,112440.0,0,32,7
2022-08-19 23:48:00,112470.0,112520.0,112430.0,112430.0,0,96,56


Is 1 minute grouping correct? Is minute 10:00:00 consists of all trades from 10:00:00 to 10:00:59.999? Is it the correct way to build bars?

In [5]:
path = 'data/trades_RIU2@FORTS_2022_08_23_2022_08_23.csv'
trades_valid = load_trades_from_csv(path)
# display(trades_valid)
# trades_valid.info()
# display('Duplicates count',trades_valid.duplicated().sum())
resampled_valid = trades_valid.set_index('datetime').groupby(pd.Grouper(freq='1Min'))
one_minute_bar_valid = get_bar_stats(resampled_valid)
one_minute_bar_valid = one_minute_bar_valid.dropna()

In [6]:
data = pd.concat([one_minute_bar,one_minute_bar_valid])
data.columns = ['Open','High','Low','Close','VWAP','Volume','Txn']

In [7]:
data

Unnamed: 0_level_0,Open,High,Low,Close,VWAP,Volume,Txn
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-08-19 10:00:00,111670.0,111970.0,111300.0,111700.0,0,1455,832
2022-08-19 10:01:00,111700.0,111850.0,111580.0,111670.0,0,731,466
2022-08-19 10:02:00,111670.0,111670.0,111400.0,111520.0,0,715,413
2022-08-19 10:03:00,111530.0,111730.0,111360.0,111430.0,0,636,440
2022-08-19 10:04:00,111450.0,111670.0,111210.0,111250.0,0,1014,563
...,...,...,...,...,...,...,...
2022-08-23 23:45:00,114800.0,114820.0,114800.0,114820.0,0,15,8
2022-08-23 23:46:00,114820.0,114850.0,114820.0,114850.0,0,13,7
2022-08-23 23:47:00,114840.0,114880.0,114820.0,114880.0,0,29,19
2022-08-23 23:48:00,114820.0,114850.0,114820.0,114820.0,0,4,4


https://kernc.github.io/backtesting.py/doc/examples/Trading%20with%20Machine%20Learning.html

Let's create a class.

We can use pretrained models, train model on first n bars, train model iteratively.

Let's hypothesize that intraday activity has a limited number of states with different degrees of intensity. Whatever states and whatever nature of intensity. I'm simply trying to make a case for a pretrained model.  
So let's use pretrained model.

In [8]:
def SMA(values, n):
    """
    Return simple moving average of `values`, at
    each step taking into account `n` previous values.
    """
    return pd.Series(values).rolling(n).mean()

In [13]:
class MLStrategy(Strategy):
    up_threshold = 0.1
    down_threshold = 0.9
    
    tp = 100
    sl = 50
    
    def init(self):
        with open('model/logreg.pkl','rb') as f:
            self.up_model = pickle.load(f)
            
        with open('model/lgbm_model.pkl','rb') as f:
            self.down_model = pickle.load(f)
        
    def next(self):
        # print(self.data.df)
        data = self.data.df.iloc[-1:]
        # print(data)
        # print(data.columns)
        data.columns = ['open','high','low','close','vwap','volume','txn']
        # print(data)

        predict_proba_up = self.down_model.predict_proba(data)[:,1]
        # print(predict_proba_up)
        predict_proba_down = self.down_model.predict_proba(data)[:,1]
        # print(predict_proba_down)
        
        forecast = predict_proba_up > self.up_threshold and predict_proba_down < self.down_threshold
        # forecast = 1
        # print(forecast)
        
        tp = self.data.Close[-1] + 3 * self.tp
        sl = self.data.Close[-1] - self.sl
        if forecast and not self.position.is_long:
            print(self.data.df.iloc[-1])
            self.buy(size=1,tp=tp,sl=sl)
            

In [14]:
bt = Backtest(data, MLStrategy, cash=10_000_000, commission=.002)
stats = bt.run()
stats

Open      111700.0
High      111850.0
Low       111580.0
Close     111670.0
VWAP           0.0
Volume       731.0
Txn          466.0
Name: 2022-08-19 10:01:00, dtype: float64
Open      111670.0
High      111670.0
Low       111400.0
Close     111520.0
VWAP           0.0
Volume       715.0
Txn          413.0
Name: 2022-08-19 10:02:00, dtype: float64
Open      111450.0
High      111670.0
Low       111210.0
Close     111250.0
VWAP           0.0
Volume      1014.0
Txn          563.0
Name: 2022-08-19 10:04:00, dtype: float64
Open      111240.0
High      111240.0
Low       110930.0
Close     111020.0
VWAP           0.0
Volume      1533.0
Txn          957.0
Name: 2022-08-19 10:05:00, dtype: float64
Open      111020.0
High      111100.0
Low       110880.0
Close     111060.0
VWAP           0.0
Volume       823.0
Txn          509.0
Name: 2022-08-19 10:06:00, dtype: float64
Open      111000.0
High      111010.0
Low       110880.0
Close     110980.0
VWAP           0.0
Volume       520.0
Txn        

Start                     2022-08-19 10:00:00
End                       2022-08-23 23:49:00
Duration                      4 days 13:49:00
Exposure Time [%]                   97.463538
Equity Final [$]                   9933418.08
Equity Peak [$]                    10000000.0
Return [%]                          -0.665819
Buy & Hold Return [%]                2.730528
Return (Ann.) [%]                  -34.379309
Volatility (Ann.) [%]                     NaN
Sharpe Ratio                              NaN
Sortino Ratio                             0.0
Calmar Ratio                              0.0
Max. Drawdown [%]                   -0.666619
Avg. Drawdown [%]                   -0.666619
Max. Drawdown Duration        4 days 13:48:00
Avg. Drawdown Duration        4 days 13:48:00
# Trades                                  321
Win Rate [%]                        19.003115
Best Trade [%]                       0.759421
Worst Trade [%]                      -0.28851
Avg. Trade [%]                    

In [None]:
bt.plot()

In [None]:
%%time

stats = bt.optimize(n1=range(5, 30, 5),
                    n2=range(10, 70, 5),
                    maximize='Equity Final [$]',
                    constraint=lambda param: param.n1 < param.n2)
stats

In [None]:
stats._strategy

In [None]:
bt.plot(plot_volume=False, plot_pl=False)

In [None]:
stats.tail()

In [None]:
stats['_equity_curve']  # Contains equity/drawdown curves. DrawdownDuration is only defined at ends of DD periods.

In [None]:
stats['_trades']  # Contains individual trade data