In [32]:
import os
import sys 
import gc
import json
import pandas as pd 
import numpy as np 
import talib
import sqlite3 as sql
from itertools import repeat
from p_tqdm import p_map, p_umap, t_map
sys.path.append('../')
from backtester import * 
from fxcmtoken import my_assets

In [2]:
my_assets

['EUR/USD',
 'USD/JPY',
 'GBP/USD',
 'USD/CHF',
 'AUD/USD',
 'USD/CAD',
 'NZD/USD',
 'Bund',
 'AUS200',
 'ESP35',
 'EUSTX50',
 'FRA40',
 'GER30',
 'HKG33',
 'JPN225',
 'NAS100',
 'SPX500',
 'UK100',
 'US30',
 'Copper',
 'NGAS',
 'UKOil',
 'USOil',
 'XAU/USD',
 'XAG/USD']

In [3]:
freq = 'H1'
sql_path = f'/home/kachain/python_projects/algorithmic_trading/fxcmpy_trader/PriceData_{freq}.db'
conn = sql.connect(sql_path)
# conn.close()

In [4]:
ticker = my_assets[0]
data_sql = pd.read_sql(f"SELECT * FROM '{ticker}'", conn, parse_dates=['date'], index_col=['date'])
for col in ['open', 'close', 'high', 'low']:
    data_sql[col] = data_sql[['bid'+col, 'ask'+col]].mean(axis=1)

In [5]:
params_ranges = {
        'mama': (0.1, 1, 0.1),
        'slope': (10, 101, 10),
        'tsf': (10, 101, 10)
    }

In [6]:
def get_strategy_return(data, signal, param, freq):    
    backtester = IterativeBacktester(data=data, signals=signal, freq=freq)
    backtester.backtest(progress_bar=False)

    return param, backtester.return_df.loc['TotalReturn', 'Portfolio']

def get_optim_param(data, signal, param, freq, start_date, end_date): 
    res = p_umap(get_strategy_return, repeat(data.copy(deep=True)), signal, param, repeat(freq), leave=False)
    optim_param = pd.DataFrame(res).sort_values(by=1, ascending=False).iloc[0,0]
    
    return (start_date, end_date), optim_param

In [7]:
# MAMA

def get_mama_optim(input_data, param_range):

    # mama_range = params_ranges['mama']
    mama_periods = np.arange(param_range[0], param_range[1], param_range[2])

    data = input_data.copy(deep=True)
    data['ht'] = talib.HT_TRENDLINE(data.close)
    data.dropna(axis=0, inplace=True)
    for n in mama_periods:
        data[f"mama_{n:.1f}"], data[f"fama_{n:.1f}"] = talib.MAMA(data.ht, fastlimit=n, slowlimit=n/10)
        data[f'signals_{n:.1f}'] = np.where(data[f"mama_{n:.1f}"] > data[f"fama_{n:.1f}"], 1, -1) 
    data.dropna(axis=0, inplace=True)

    sat_index = data.resample('W-Sat').last().index
    datasets = []
    start_dates = []
    end_dates = []
    params = []
    signals = []

    for i in range(len(sat_index)-1):
        start_dates.append(sat_index[i])
        end_dates.append(sat_index[i+1])
        data_ = data.loc[sat_index[i]:sat_index[i+1],:]
        datasets.append(data_.copy(deep=True))
        sub_params = []
        sub_signals = []
        for n in mama_periods:
            sub_params.append(n)
            sub_signals.append(data_.loc[:,f"signals_{n:.1f}"])
        params.append(sub_params)
        signals.append(sub_signals)
        
    mama_optim = t_map(get_optim_param, datasets, signals, params, repeat(freq), start_dates, end_dates, leave=False)
    mama_optim = pd.DataFrame(mama_optim)
    mama_optim.columns = ['dates', 'mama_param']
    mama_optim.set_index('dates', inplace=True)

    gc.collect()
    
    return mama_optim

In [10]:
# Slope

def get_slope_optim(input_data, param_range):

    # slope_range = params_ranges['slope']
    slope_periods = np.arange(param_range[0], param_range[1], param_range[2])

    data = input_data.copy(deep=True)
    data['ht'] = talib.HT_TRENDLINE(data.close)
    data.dropna(axis=0, inplace=True)
    for n in slope_periods:
        data[f'signals_{n:.0f}'] = talib.LINEARREG_SLOPE(data.ht, timeperiod=n).apply(np.sign)
    data.dropna(axis=0, inplace=True)

    sat_index = data.resample('W-Sat').last().index
    datasets = []
    start_dates = []
    end_dates = []
    params = []
    signals = []

    for i in range(len(sat_index)-1):
        start_dates.append(sat_index[i])
        end_dates.append(sat_index[i+1])
        data_ = data.loc[sat_index[i]:sat_index[i+1],:]
        datasets.append(data_.copy(deep=True))
        sub_params = []
        sub_signals = []
        for n in slope_periods:
            sub_params.append(n)
            sub_signals.append(data_.loc[:,f"signals_{n:.0f}"])
        params.append(sub_params)
        signals.append(sub_signals)
        
    slope_optim = t_map(get_optim_param, datasets, signals, params, repeat(freq), start_dates, end_dates, leave=False)
    slope_optim = pd.DataFrame(slope_optim)
    slope_optim.columns = ['dates', 'slope_param']
    slope_optim.set_index('dates', inplace=True)

    gc.collect()
    
    return slope_optim

In [13]:
# TSF

def get_tsf_optim(input_data, param_range):
    # tsf_range = params_ranges['tsf']
    tsf_periods = np.arange(param_range[0], param_range[1], param_range[2])

    data = input_data.copy(deep=True)
    data['ht'] = talib.HT_TRENDLINE(data.close)
    data.dropna(axis=0, inplace=True)
    for n in tsf_periods:
        data[f'tsf_{n:.0f}'] = talib.TSF(data.ht, n)
        data[f'signals_{n:.0f}'] = np.where(data.ht > data[f'tsf_{n:.0f}'], 1, -1)
    data.dropna(axis=0, inplace=True)

    sat_index = data.resample('W-Sat').last().index
    datasets = []
    start_dates = []
    end_dates = []
    params = []
    signals = []

    for i in range(len(sat_index)-1):
        start_dates.append(sat_index[i])
        end_dates.append(sat_index[i+1])
        data_ = data.loc[sat_index[i]:sat_index[i+1],:]
        datasets.append(data_.copy(deep=True))
        sub_params = []
        sub_signals = []
        for n in tsf_periods:
            sub_params.append(n)
            sub_signals.append(data_.loc[:,f"signals_{n:.0f}"])
        params.append(sub_params)
        signals.append(sub_signals)
        
    tsf_optim = t_map(get_optim_param, datasets, signals, params, repeat(freq), start_dates, end_dates, leave=False)
    tsf_optim = pd.DataFrame(tsf_optim)
    tsf_optim.columns = ['dates', 'tsf_param']
    tsf_optim.set_index('dates', inplace=True)

    gc.collect()
    
    return tsf_optim

In [14]:
mama_optim = get_mama_optim(data_sql ,params_ranges['mama'])
slope_optim = get_slope_optim(data_sql, params_ranges['slope'])
tsf_optim = get_tsf_optim(data_sql, params_ranges['tsf'])

                                               

In [16]:
best_parameters = pd.DataFrame()
best_parameters = best_parameters.join(mama_optim, how='outer')
best_parameters = best_parameters.join(slope_optim, how='outer')
best_parameters = best_parameters.join(tsf_optim, how='outer')

In [17]:
best_parameters

Unnamed: 0_level_0,mama_param,slope_param,tsf_param
dates,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"(2021-01-30 00:00:00, 2021-02-06 00:00:00)",0.4,,
"(2021-02-06 00:00:00, 2021-02-13 00:00:00)",0.3,50.0,80.0
"(2021-02-13 00:00:00, 2021-02-20 00:00:00)",0.7,10.0,40.0
"(2021-02-20 00:00:00, 2021-02-27 00:00:00)",0.3,20.0,20.0
"(2021-02-27 00:00:00, 2021-03-06 00:00:00)",0.1,90.0,70.0
...,...,...,...
"(2022-07-16 00:00:00, 2022-07-23 00:00:00)",0.3,10.0,80.0
"(2022-07-23 00:00:00, 2022-07-30 00:00:00)",0.3,40.0,10.0
"(2022-07-30 00:00:00, 2022-08-06 00:00:00)",0.9,60.0,20.0
"(2022-08-06 00:00:00, 2022-08-13 00:00:00)",0.4,20.0,70.0


In [18]:
param_list = best_parameters[['mama_param', 'slope_param', 'tsf_param']].values.tolist()
best_parameters['params'] = param_list
best_parameters_shift = best_parameters.shift(1).dropna(axis=0)['params'].copy(deep=True)

In [37]:
dates = best_parameters_shift.index.values
parameters = best_parameters_shift.values

In [90]:
# params --> (mama, slope, tsf)
# params.apply(func) --> get returns

def get_aggregate_return(data, ds, params):
    data_ = data.loc[:ds[1],:].copy(deep=True)
    data_['ht'] = talib.HT_TRENDLINE(data_.close)

    data_["mama"], data_["fama"] = talib.MAMA(data_.ht, fastlimit=params[0], slowlimit=params[0]/10)
    data_['signals_mama'] = np.where(data_[f"mama"] > data_["fama"], 1, -1) 

    data_['signals_slope'] = talib.LINEARREG_SLOPE(data_.ht, timeperiod=params[1]).apply(np.sign)

    data_['tsf'] = talib.TSF(data_.ht, params[2])
    data_['signals_tsf'] = np.where(data_.ht > data_['tsf'], 1, -1)
    
    data_.dropna(axis=0, inplace=True)
    data_ = data_.loc[ds[0]:ds[1],:]
    data_['signals_aggregate'] = data_[['signals_mama', 'signals_slope', 'signals_tsf']].mode(axis=1)
    
    backtester = IterativeBacktester(data=data_, signals=data_.signals_aggregate, freq=freq)
    backtester.backtest(progress_bar=False)
    
    return ds[0], backtester.return_df.loc['TotalReturn', 'Portfolio']


In [99]:
res_ = p_umap(get_aggregate_return, repeat(data_sql.copy(deep=True)), dates, parameters)

100%|██████████| 79/79 [00:01<00:00, 50.91it/s]


In [100]:
res_df = pd.DataFrame(res_)
res_df.columns = ['dates', ticker]
res_df.set_index('dates', inplace=True)

In [101]:
res_df

Unnamed: 0_level_0,EUR/USD
dates,Unnamed: 1_level_1
2021-02-13,-0.003229
2021-02-20,-0.000335
2021-02-27,0.007611
2021-03-06,-0.006323
2021-03-20,-0.010537
...,...
2022-07-16,0.001834
2022-07-23,-0.011016
2022-07-30,-0.019696
2022-08-06,-0.015475


In [109]:
def get_backtest_returns(ticker, freq, params_ranges):
    freq = 'H1'
    sql_path = f'/home/kachain/python_projects/algorithmic_trading/fxcmpy_trader/PriceData_{freq}.db'
    conn = sql.connect(sql_path)
    data_sql = pd.read_sql(f"SELECT * FROM '{ticker}'", conn, parse_dates=['date'], index_col=['date'])
    for col in ['open', 'close', 'high', 'low']:
        data_sql[col] = data_sql[['bid'+col, 'ask'+col]].mean(axis=1)
    conn.close()
        
    mama_optim = get_mama_optim(data_sql ,params_ranges['mama'])
    slope_optim = get_slope_optim(data_sql, params_ranges['slope'])
    tsf_optim = get_tsf_optim(data_sql, params_ranges['tsf'])
    
    best_parameters = pd.DataFrame()
    best_parameters = best_parameters.join(mama_optim, how='outer')
    best_parameters = best_parameters.join(slope_optim, how='outer')
    best_parameters = best_parameters.join(tsf_optim, how='outer')
    
    param_list = best_parameters[['mama_param', 'slope_param', 'tsf_param']].values.tolist()
    best_parameters['params'] = param_list
    best_parameters_shift = best_parameters.shift(1).dropna(axis=0)['params'].copy(deep=True)
    
    dates = best_parameters_shift.index.values
    parameters = best_parameters_shift.values
    
    res_ = p_umap(get_aggregate_return, repeat(data_sql.copy(deep=True)), dates, parameters, leave=False)
    res_df = pd.DataFrame(res_)
    res_df.columns = ['dates', ticker]
    res_df.set_index('dates', inplace=True)
    
    gc.collect()
    
    return res_df

In [112]:
ret_ = t_map(get_backtest_returns, my_assets, repeat(freq), repeat(params_ranges))

  0%|          | 0/2 [00:00<?, ?it/s]
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[

In [122]:
backtest_results = pd.DataFrame()

for frame in ret_:
    backtest_results = backtest_results.join(frame, how='outer')
    
backtest_results.sort_index(ascending=True, inplace=True)

In [125]:
save_folder = 'backtest_parameters'
os.makedirs(save_folder, exist_ok=True)
backtest_results.to_csv(os.path.join(save_folder, 'backtest_params_results.csv'))