In [1]:
import sys 
import os  
import json
import datetime as dt 
import pandas as pd 
import numpy as np 
import talib
import sqlite3 as sql
from itertools import repeat
from p_tqdm import p_umap
sys.path.append('../')
from backtester import * 
from fxcmtoken import my_assets

In [2]:
for i, a in enumerate(my_assets):
    print(i, a)

0 EUR/USD
1 USD/JPY
2 GBP/USD
3 USD/CHF
4 AUD/USD
5 USD/CAD
6 NZD/USD
7 AUS200
8 ESP35
9 EUSTX50
10 FRA40
11 GER30
12 HKG33
13 JPN225
14 NAS100
15 SPX500
16 UK100
17 US30
18 Copper
19 NGAS
20 UKOil
21 USOil
22 XAU/USD
23 XAG/USD


In [3]:
freq = 'H1'
conn = sql.connect(f'../PriceData/PriceData_{freq}.db')
ticker = my_assets[0]
print(ticker)
data_sql = pd.read_sql(f"SELECT * FROM '{ticker}'", conn, parse_dates=['date'], index_col=['date'])
for col in ['open', 'close', 'high', 'low']:
    data_sql[col] = data_sql[['bid'+col, 'ask'+col]].mean(axis=1)
conn.close()
data_sql.tail()

EUR/USD


Unnamed: 0_level_0,bidopen,bidclose,bidhigh,bidlow,askopen,askclose,askhigh,asklow,tickqty,open,close,high,low
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2022-07-29 16:00:00,1.02007,1.01963,1.02102,1.01765,1.02021,1.01975,1.02117,1.01778,14702,1.02014,1.01969,1.021095,1.017715
2022-07-29 17:00:00,1.01962,1.02091,1.02136,1.0193,1.01975,1.02107,1.02154,1.01942,9406,1.019685,1.02099,1.02145,1.01936
2022-07-29 18:00:00,1.02091,1.02193,1.02199,1.0206,1.02106,1.02207,1.02212,1.02073,8432,1.020985,1.022,1.022055,1.020665
2022-07-29 19:00:00,1.02194,1.02203,1.02222,1.02118,1.02207,1.02215,1.02234,1.02131,8124,1.022005,1.02209,1.02228,1.021245
2022-07-29 20:00:00,1.02205,1.02254,1.02265,1.0216,1.02218,1.02279,1.02287,1.02177,4079,1.022115,1.022665,1.02276,1.021685


In [4]:
def get_mama_return(n_period, data, freq, split_date):
    
    data['ht'] = talib.HT_TRENDLINE(data.close)
    data.dropna(axis=0, inplace=True)
    data['mama'], data['fama'] = talib.MAMA(data.ht, fastlimit=n_period, slowlimit=n_period/10)
    data.dropna(axis=0, inplace=True)
    data['signals'] = np.where(data.mama > data.fama, 1, -1) 
    
    train, test = (data.loc[pd.Timestamp(split_date)-dt.timedelta(days=7):pd.Timestamp(split_date)-dt.timedelta(days=1)] , 
                   data.loc[pd.Timestamp(split_date):])
    
    train_backtest = IterativeBacktester(data=train, signals=train.signals, freq=freq)
    train_backtest.backtest(progress_bar=False)
    
    test_backtest = IterativeBacktester(data=test, signals=test.signals, freq=freq)
    test_backtest.backtest(progress_bar=False)

    return n_period, train_backtest.return_df.loc['TotalReturn', 'Portfolio'], test_backtest.return_df.loc['TotalReturn', 'Portfolio']

# ------------------------------------------------------------------

def get_tsf_return(n_period, data, freq, split_date):
    
    data['ht'] = talib.HT_TRENDLINE(data.close)
    data['tsf'] = talib.TSF(data.ht, n_period)
    data.dropna(axis=0, inplace=True)
    data['signals'] = np.where(data.ht > data.tsf, 1, -1)
    
    train, test = (data.loc[pd.Timestamp(split_date)-dt.timedelta(days=7):pd.Timestamp(split_date)-dt.timedelta(days=1)] , 
                   data.loc[pd.Timestamp(split_date):])
    
    train_backtest = IterativeBacktester(data=train, signals=train.signals, freq=freq)
    train_backtest.backtest(progress_bar=False)
    
    test_backtest = IterativeBacktester(data=test, signals=test.signals, freq=freq)
    test_backtest.backtest(progress_bar=False)

    return n_period, train_backtest.return_df.loc['TotalReturn', 'Portfolio'], test_backtest.return_df.loc['TotalReturn', 'Portfolio']

# ----------------------------------------------------------------

def get_slope_return(n_period, data, freq, split_date):
    
    data['ht'] = talib.HT_TRENDLINE(data.close)
    data.dropna(axis=0, inplace=True)
    data['signals'] = talib.LINEARREG_SLOPE(data.ht, timeperiod=n_period).apply(np.sign)
    data.dropna(axis=0, inplace=True)
    
    train, test = (data.loc[pd.Timestamp(split_date)-dt.timedelta(days=7):pd.Timestamp(split_date)-dt.timedelta(days=1)] , 
                   data.loc[pd.Timestamp(split_date):])
    
    train_backtest = IterativeBacktester(data=train, signals=train.signals, freq=freq)
    train_backtest.backtest(progress_bar=False)
    
    test_backtest = IterativeBacktester(data=test, signals=test.signals, freq=freq)
    test_backtest.backtest(progress_bar=False)

    return n_period, train_backtest.return_df.loc['TotalReturn', 'Portfolio'], test_backtest.return_df.loc['TotalReturn', 'Portfolio']

# -----------------------------------------------------------------------------

def get_performance(data, split_date, params, freq):
    # params --> (mama, slope, tsf)
      
    data['ht'] = talib.HT_TRENDLINE(data.close)
    data['mama'], data['fama'] = talib.MAMA(data.ht, fastlimit=params[0], slowlimit=params[0]/10)
    data['slope'] = talib.LINEARREG_SLOPE(data.ht, timeperiod=params[1])
    data['tsf'] = talib.TSF(data.ht, params[2])
    data.dropna(axis=0, inplace=True)

    # signals
    data['mama_signals'] = np.where(data.mama > data.fama, 1, -1) 
    data['slope_signals'] = data.slope.apply(np.sign)
    data['tsf_signals'] = np.where(data.ht > data.tsf, 1, -1)
    signal_cols = ['mama_signals', 'slope_signals', 'tsf_signals']
    data['agg_signals'] = data[signal_cols].mode(axis=1)
    
    # train/test split
    train, test = (data.loc[pd.Timestamp(split_date)-dt.timedelta(days=7):pd.Timestamp(split_date)-dt.timedelta(days=1)] , 
                   data.loc[pd.Timestamp(split_date):])
    
    # backtest train
    train_date_range = train.index[-1]-train.index[0]
    train_backtest = IterativeBacktester(data=train, signals=train.agg_signals, freq=freq)
    train_backtest.backtest(progress_bar=False)
    
    train_ret = train_backtest.return_df.loc['TotalReturn', 'Portfolio']
    train_signal_counts = train_backtest.signals.value_counts()
    train_signal_changes = train_backtest.signals.diff(1).dropna().apply(np.abs).value_counts()
    
    train_total_days = train_date_range.total_seconds() / (60*60*24)
    try:
        train_pos_short = train_signal_counts[-1]
    except:
        train_pos_short = 0
    try:
        train_pos_long = train_signal_counts[1]
    except:
        train_pos_long = 0
    train_pos_changes = (train_signal_changes.index * train_signal_changes).sum()
    
    # backtest test
    test_date_range = test.index[-1]-test.index[0]
    test_backtest = IterativeBacktester(data=test, signals=test.agg_signals, freq=freq)
    test_backtest.backtest(progress_bar=False)
    
    test_ret = test_backtest.return_df.loc['TotalReturn', 'Portfolio']
    test_signal_counts = test_backtest.signals.value_counts()
    test_signal_changes = test_backtest.signals.diff(1).dropna().apply(np.abs).value_counts()
    
    test_total_days = test_date_range.total_seconds() / (60*60*24)
    try:
        test_pos_short = test_signal_counts[-1]
    except:
        test_pos_short = 0
    try:
        test_pos_long = test_signal_counts[1]
    except:
        test_pos_long = 0
    test_pos_changes = (test_signal_changes.index * test_signal_changes).sum()
    
    # (train , test)
    # returns, #days, #short, #long, #posChanges
#     performances[params] = (train_ret, train_total_days, train_pos_short, train_pos_long, train_pos_changes,
#                             test_ret, test_total_days, test_pos_short, test_pos_long, test_pos_changes)
    return (params, 
            train_ret, train_total_days, train_pos_short, train_pos_long, train_pos_changes,
            test_ret, test_total_days, test_pos_short, test_pos_long, test_pos_changes)

In [5]:
# saturday = data_sql.resample('W-SAT').last().index

# end_dates = []
# for i in range(len(saturday)-6):
# #     split_date = saturday[i+5]

#     # need enough extra dates to warmup data
#     data_ = data_sql.loc[saturday[i]:saturday[i+6]]
#     end_dates.append(data_.index[-1])
    
# #     train_dat_ = data_.loc[split_date-dt.timedelta(days=7):split_date]
# #     test_dat_ = data_.loc[split_date:]
# #     print(f"warmup: {len(data_.loc[:split_date])}")    
# #     print(f"Train: {len(train_dat_)} --- {train_dat_.index[0]} to {train_dat_.index[-1]}")
# #     print(f"Test: {len(test_dat_)} --- {test_dat_.index[0]} to {test_dat_.index[-1]}")
# #     print("-"*50)

In [8]:
# (min, max, step)
params_ranges = {
    'mama': (0.1, 1, 0.1),
    'slope': (200, 501, 10),
    'tsf': (200, 501, 10)
}

freq = 'H1'
conn = sql.connect(f'../PriceData/PriceData_{freq}.db')

for ticker in tqdm_notebook(my_assets, leave=True, desc='Tickers'):    
    data_sql = pd.read_sql(f"SELECT * FROM '{ticker}'", conn, parse_dates=['date'], index_col=['date'])

    for col in ['open', 'close', 'high', 'low']:
        data_sql[col] = data_sql[['bid'+col, 'ask'+col]].mean(axis=1)
        
    # create dataframe to store results
    saturday = data_sql.resample('W-SAT').last().index
    end_dates = []
    for i in range(len(saturday)-6):
        data_ = data_sql.loc[saturday[i]:saturday[i+6]]
        end_dates.append(data_.index[-1])
    backtest_performances = pd.DataFrame(columns=my_assets, index=end_dates)
        
    for i in tqdm_notebook(range(len(saturday)-6), leave=False, desc='dates'):
        split_date = saturday[i+5] 
        data_ = data_sql.loc[saturday[i]:saturday[i+6]].copy(deep=True)
        end_ = data_.index[-1]
        
        
        mama_range = params_ranges['mama']
        mama_results = p_umap(get_mama_return, 
                              np.arange(mama_range[0], mama_range[1], mama_range[2]),
                              repeat(data_.copy(deep=True)), 
                              repeat(freq),
                              repeat(split_date), 
                              leave=False)
        best_mama_period = pd.DataFrame(mama_results).sort_values(by=1, ascending=False).iloc[0, 0]

        
        slope_range = params_ranges['slope']
        slope_results = p_umap(get_slope_return, 
                               np.arange(slope_range[0], slope_range[1], slope_range[2]),
                               repeat(data_.copy(deep=True)), 
                               repeat(freq),
                               repeat(split_date),
                               leave=False)
        best_slope_period = pd.DataFrame(slope_results).sort_values(by=1, ascending=False).iloc[0, 0]

        
        tsf_range = params_ranges['tsf']
        tsf_results = p_umap(get_tsf_return, 
                             np.arange(tsf_range[0], tsf_range[1], tsf_range[2]),
                             repeat(data_.copy(deep=True)), 
                             repeat(freq),
                             repeat(split_date),
                             leave=False)
        best_tsf_period = pd.DataFrame(tsf_results).sort_values(by=1, ascending=False).iloc[0, 0]
    
    
        params = (best_mama_period, best_slope_period, best_tsf_period)
        perf = get_performance(data_.copy(deep=True), split_date, params, freq)
        test_ret = perf[6]
        backtest_performances.loc[end_, ticker] = test_ret
        
conn.close()

last_date = backtest_performances.index[-1]

folder = 'backtest_parameters'
fname = f"backtest_params_{last_date.strftime('%b-%d-%Y')}.csv"

os.makedirs(folder, exist_ok=True)
backtest_performances.to_csv(os.path.join(folder, fname))

Tickers:   0%|          | 0/24 [00:00<?, ?it/s]

dates:   0%|          | 0/76 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

KeyboardInterrupt: 