In [1]:
import pandas as pd
import numpy as np
import pandas_ta as ta
from itertools import product

In [2]:
class Backtester():
    '''
    Performs backtesting for a given data set and strategy.
    
    Attributes:
    ----------
    filepath: str
        local filepath of the data set (csv-file)
    symbol: str
        ticker symbol (instrument) to be back tested
    start: str
        start date for data import
    end: str
        end date for data import
    .............
    tc: float
        proportional trading costs per trade
    large_window: int
        to calculate moving average price and volume for reference
    short_window: int
        to calculate rsi
    delta: float
        to define sell and buy conditions
    wait_time: int
        max time to wait before sell
    price_stdlow: float
        price standard deviation lower bound
    volume_rsilow: float
        volume rsi lower bound
    volume_rsihi: float
        volumer rsi upper bound
    price_rsihi: float
        price rsi upper bound
        
    Methods:
    ----------
    get_data:
        imports the data
    prepare_data:
        prepares data, creates some additional features
    backtest:
        runs a backtesting strategy and gives return%
    optimize strategy:
        optimizes the return performing a grid search over the parameter space
    find best strategy:
        gives the best strategy found, prints its performance
        
    '''
    
    def __init__(self, filepath, symbol, start, end, tc):
        self.filepath = filepath
        self.symbol = symbol
        self.start = start
        self.end = end
        self.tc = tc
        self.get_data()
        
    def __repr__(self):
        return "Backtester(symbol = {}, start = {}, end = {})" \
            .format(self.symbol, self.start, self.end)
    
    def get_data(self):
        ''' imports data
        '''
        raw = pd.read_csv(self.filepath, index_col = 'date')
        raw = raw.iloc[::-1] # careful here - apply only if the date is backward
        raw = raw.loc[self.start:self.end].copy()
        self.df0 = raw
        #print(self.df0)
        
    def prepare_data(self, large_window = None, short_window = None):
        '''prepares data for backtesting
        '''
        #print(large_window)
        df = self.df0.copy()
        df.rename(columns = {'Volume USD': 'volume'}, inplace = True)
        
        #references
        df['rolling_avg_price'] = df['close'].rolling(large_window).mean()
        df['rolling_avg_vol'] = df['volume'].rolling(large_window).mean()
        
        # positions
        df['price_return'] = ((df['close'] - df['close'].shift(periods = 1))/df['close'].shift(periods = 1))*100
        df['volume_return'] = ((df['volume'] - df['volume'].shift(periods = 1))/df['volume'].shift(periods = 1))*100
        df['relative_price'] = ((df['close'] - df['rolling_avg_price'])/df['rolling_avg_price'])*100
        df['relative_vol'] = ((df['volume'] - df['rolling_avg_vol'])/df['rolling_avg_vol'])*100
        
        # momentum
        df['price_rsi'] = ta.rsi(df['close'], length=short_window)
        df['volume_rsi'] = ta.rsi(df['volume'], length=short_window)
        
        # volatility
        df['price_std'] = (ta.stdev(df['close'], timeperiod=short_window)/df['rolling_avg_price'])*100
        df['volume_std'] = (ta.stdev(df['volume'], timeperiod=short_window)/df['rolling_avg_vol'])*100
        df['recent_vlt'] = ((df['high'] - df['low'])/df['rolling_avg_price'])*100
    
        # filter extreme values
        df = df.loc[df.volume > 100]
        df = df.loc[df.volume_return < 500000]
        df = df.loc[df.volume_std < 500000]
        df.dropna(inplace = True)
        
        self.df = df
        
    def backtest(self, params):
        large_window, short_window, delta, wait_time, price_stdlow, volume_rsilow, \
                    volume_rsihi, price_rsihi, alpha = \
                    int(params[0]), int(params[1]), params[2], int(params[3]),\
                    params[4], params[5], params[6], params[7], params[8]
        
        self.prepare_data(large_window = large_window, short_window = short_window)
        df = self.df.copy()
        #print(df)
        tc = self.tc
        #define strategy, calulate returns
        return_margin = 1+delta
        loss_margin = 1-delta/alpha
    
        relative_price = df.relative_price.values
        relative_vol = df.relative_vol.values
        price_return = df.price_return.values
        volume_return = df.volume_return.values

        price_rsi = df.price_rsi.values
        volume_rsi = df.volume_rsi.values

        price_std = df.price_std.values
        volume_std = df.volume_std.values
        recent_vlt = df.recent_vlt.values
        
        # upper trend starting points using different conditions
        break_out_index0 = []  
        for i in range(1,len(relative_price)):
            if (relative_price[i-1] <= 0.0 and relative_price[i]> 0.0 and price_std[i]>price_stdlow 
                    and volume_rsilow<volume_rsi[i]<volume_rsihi and price_rsi[i]<price_rsihi):
                break_out_index0.append(i)
        #print(len(break_out_index0))
    
        break_out_index = [break_out_index0[0]]
        
        for i in range(len(break_out_index0)):
            if break_out_index0[i] - break_out_index[-1] > wait_time:
                break_out_index.append(break_out_index0[i])
        #print(len(break_out_index))
        
        
            # calculate profits
        principle = df.close.values[break_out_index[0]]
        close = df['close'].values
        label_column = len(close)*[np.nan]
        break_out_index = [i for i in break_out_index if i-wait_time >= 0]
        profit = 0
        
        
        for i in range(len(close)-wait_time):
            if i in break_out_index:
                #print(i)
                present_price = close[i]
                future_price_seq = close[i+1:i+wait_time+1]
                label = 0
                for index, elem in enumerate(future_price_seq):
                    if elem > present_price*return_margin:
                        label = 1
                        break
                    if elem < present_price*loss_margin:
                        break
                label_column[i] = label
            
                profit += ( (elem-present_price) - tc*present_price )
            
        wins = label_column.count(1)
        losses = label_column.count(0)
    
        self.profit = profit/principle*100
        #profit = self.profit 
        print(self.profit)
        return self.profit
        
    def optimize_strategy(self, para_range = None):
        '''finds returns for all possible 
        combinations'''
        large_window_range, short_window_range, delta_range, \
        wait_time_range, price_stdlow_range, volume_rsilow_range, \
        volume_rsihi_range, price_rsihi_range, alpha_range = para_range[0], \
        para_range[1], para_range[2], para_range[3], para_range[4], \
        para_range[5], para_range[6], para_range[7], para_range[8]
        
        combinations = list(product(large_window_range, short_window_range, delta_range, \
                                    wait_time_range, price_stdlow_range, volume_rsilow_range, \
                                    volume_rsihi_range, price_rsihi_range, alpha_range))
        
        print('No of combinations:{}'.format(len(combinations)))
    
        results = []
        i = 0
        for comb in combinations:
            #print(comb)
            self.backtest(comb)
            result = self.profit
            results.append(result)
            i = i+1
            print(i)
        self.results = results
        #print(results)
        self.combinations = combinations
        
    def find_best_strategy(self, para_range):
        self.optimize_strategy(para_range = para_range_list)
        results = self.results.copy()
        combinations = self.combinations
        
        results_table = pd.DataFrame(data=combinations, columns = ['large_window', 'short_window', \
                                'delta', 'wait_time', 'price_stdlow', 'volume_rsilow', \
                                'volume_rsihi', 'price_rsihi', 'alpha'])
        results_table['returns'] = results
        
        best_results = results_table.nlargest(40, 'returns')
        print(best_results)
        
        best_results.to_csv('best_results.csv')        

In [3]:
filepath = 'BTC-2021min.csv'
symbol = "BTCUSDT"
start = "2021-01-01 00:01:00"
end = "2021-03-02 00:01:00"
tc = 0.0002
#params = (30, 14, 0.014, 275, 0.28, 40, 51, 62)

In [4]:
large_window_range = [30, 40]
short_window_range = [14]
delta_range = [0.010, 0.012, 0.014, 0.016, 0.018] 
wait_time_range = [250, 275, 350] 
price_stdlow_range = np.arange(0.27, 0.29, 0.01)
volume_rsilow_range = [40] 
volume_rsihi_range = [100] #[51]
price_rsihi_range = [100] #[60,62]
alpha_range = [1.0, 1.5, 2.0, 3.0] 

para_range_list = [large_window_range, short_window_range, delta_range, wait_time_range, \
price_stdlow_range, volume_rsilow_range, volume_rsihi_range, price_rsihi_range, alpha_range]

In [5]:
tester = Backtester(filepath = filepath, symbol = symbol, start = start, end = end, tc = tc)

In [6]:
tester.find_best_strategy(para_range = para_range_list) #-- main code to run

No of combinations:240
-33.802959511095295
1
-22.87898974795299
2
-6.942562250612788
3
-16.031177082505344
4
-26.505669177383634
5
-13.959166223217718
6
2.429291808680217
7
-3.8338784605524063
8
-17.641417201314603
9
-1.062837061632394
10
10.697365509752188
11
4.773370374442448
12
-11.206662602609264
13
3.6562854064366803
14
16.227044152645657
15
10.85670353975561
16
-20.763118381458614
17
-9.020405690807971
18
-6.157072024817407
19
-3.4609240272734536
20
-4.220507037393496
21
1.143811914431456
22
5.422844862727983
23
7.539289579312375
24
-12.358132980603997
25
-31.984587017231103
26
-12.524168444128042
27
-5.9182709375355165
28
-13.315897205509689
29
-23.425691031277086
30
-1.8401335455575147
31
8.555824499026409
32
-16.8533576815929
33
-9.477837763595693
34
9.062755167041889
35
16.750420403282003
36
-6.56057896579669
37
-1.5213282228149974
38
13.120340656868503
39
19.92786232047062
40
-25.80757887614824
41
-9.202442534577973
42
-0.3020553553033722
43
0.8647260499426543
44
-5.17030566

In [15]:
params = [30, 14, 0.018, 600, 0.28, 40, 51, 60, 1.5]
tester.backtest(params = params)

-0.5769430511079267


-0.5769430511079267

In [16]:
filepath = 'BTC-2021min.csv'
symbol = "BTCUSDT"
start = "2021-03-01 00:01:00"
end = "2021-05-02 00:01:00"
tc = 0.0002
#params = (30, 14, 0.014, 275, 0.28, 40, 51, 62)

In [17]:
df = pd.read_csv('best_results.csv')
num = df.shape[0]
results = []
for i in range(num):
    #print(df)
    params = df.iloc[i].values[1:-1]
    print(params)
    
    tester = Backtester(filepath = filepath, symbol = symbol, start = start, end = end, tc = tc)
    Return = tester.backtest(params = params)
    results.append(Return)
    
print(results)
print(df)

[3.0e+01 1.4e+01 1.8e-02 3.5e+02 2.8e-01 4.0e+01 1.0e+02 1.0e+02 2.0e+00]
8.134853949659734
[3.0e+01 1.4e+01 1.8e-02 3.5e+02 2.8e-01 4.0e+01 1.0e+02 1.0e+02 3.0e+00]
10.817850081475083
[3.0e+01 1.4e+01 1.8e-02 3.5e+02 2.8e-01 4.0e+01 1.0e+02 1.0e+02 1.5e+00]
20.891188451843476
[3.00e+01 1.40e+01 1.20e-02 2.75e+02 2.80e-01 4.00e+01 1.00e+02 1.00e+02
 3.00e+00]
7.986329534031117
[3.0e+01 1.4e+01 1.6e-02 3.5e+02 2.8e-01 4.0e+01 1.0e+02 1.0e+02 1.5e+00]
7.482385694820228
[3.0e+01 1.4e+01 1.6e-02 3.5e+02 2.8e-01 4.0e+01 1.0e+02 1.0e+02 3.0e+00]
14.185732309566726
[3.00e+01 1.40e+01 1.80e-02 2.75e+02 2.80e-01 4.00e+01 1.00e+02 1.00e+02
 3.00e+00]
14.755329811713807
[3.00e+01 1.40e+01 1.20e-02 2.75e+02 2.70e-01 4.00e+01 1.00e+02 1.00e+02
 3.00e+00]
5.17712236766474
[3.00e+01 1.40e+01 1.00e-02 2.75e+02 2.80e-01 4.00e+01 1.00e+02 1.00e+02
 2.00e+00]
9.487947197234323
[3.00e+01 1.40e+01 1.60e-02 2.75e+02 2.80e-01 4.00e+01 1.00e+02 1.00e+02
 3.00e+00]
20.600529237405766
[3.00e+01 1.40e+01 1.40e-0

In [18]:
df['forward_test_returns'] = results

In [19]:
df

Unnamed: 0.1,Unnamed: 0,large_window,short_window,delta,wait_time,price_stdlow,volume_rsilow,volume_rsihi,price_rsihi,alpha,returns,forward_test_returns
0,118,30,14,0.018,350,0.28,40,100,100,2.0,26.597509,8.134854
1,119,30,14,0.018,350,0.28,40,100,100,3.0,22.637725,10.81785
2,117,30,14,0.018,350,0.28,40,100,100,1.5,21.398143,20.891188
3,39,30,14,0.012,275,0.28,40,100,100,3.0,19.927862,7.98633
4,93,30,14,0.016,350,0.28,40,100,100,1.5,19.501049,7.482386
5,95,30,14,0.016,350,0.28,40,100,100,3.0,18.282474,14.185732
6,111,30,14,0.018,275,0.28,40,100,100,3.0,17.927115,14.75533
7,35,30,14,0.012,275,0.27,40,100,100,3.0,16.75042,5.177122
8,14,30,14,0.01,275,0.28,40,100,100,2.0,16.227044,9.487947
9,87,30,14,0.016,275,0.28,40,100,100,3.0,16.056408,20.600529


In [20]:
## The table shows that my algorithm has 'overfitted' the data

In [21]:
## Anyway, I am going to implement this
df_implemented = df.iloc[2]
df_implemented

df_implemented.to_csv('df_implemented.csv')