In [1]:
import sys 
import os  
import json
import datetime as dt 
import pandas as pd 
import numpy as np 
import talib
import sqlite3 as sql
from itertools import repeat
from p_tqdm import p_umap
sys.path.append('../')
from backtester import * 
from fxcmtoken import my_assets

In [2]:
for i, a in enumerate(my_assets):
    print(i, a)

0 EUR/USD
1 USD/JPY
2 GBP/USD
3 USD/CHF
4 AUD/USD
5 USD/CAD
6 NZD/USD
7 AUS200
8 ESP35
9 EUSTX50
10 FRA40
11 GER30
12 HKG33
13 JPN225
14 NAS100
15 SPX500
16 UK100
17 US30
18 Copper
19 NGAS
20 UKOil
21 USOil
22 XAU/USD
23 XAG/USD


In [3]:
freq = 'H1'
conn = sql.connect(f'../PriceData/PriceData_{freq}.db')
ticker = my_assets[10]
print(ticker)
data_sql = pd.read_sql(f"SELECT * FROM '{ticker}'", conn, parse_dates=['date'], index_col=['date'])
for col in ['open', 'close', 'high', 'low']:
    data_sql[col] = data_sql[['bid'+col, 'ask'+col]].mean(axis=1)
conn.close()
data_sql.tail()

FRA40


Unnamed: 0_level_0,bidopen,bidclose,bidhigh,bidlow,askopen,askclose,askhigh,asklow,tickqty,open,close,high,low
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2022-07-22 15:00:00,6225.68,6211.04,6230.38,6200.63,6226.68,6212.04,6231.38,6201.63,6323,6226.18,6211.54,6230.88,6201.13
2022-07-22 16:00:00,6211.29,6195.34,6214.21,6194.1,6212.29,6196.34,6215.21,6195.1,2582,6211.79,6195.84,6214.71,6194.6
2022-07-22 17:00:00,6194.84,6181.79,6194.84,6173.65,6195.84,6182.79,6195.84,6174.65,1922,6195.34,6182.29,6195.34,6174.15
2022-07-22 18:00:00,6181.42,6168.42,6182.19,6161.08,6182.42,6169.42,6183.19,6162.08,1868,6181.92,6168.92,6182.69,6161.58
2022-07-22 19:00:00,6168.38,6178.51,6182.79,6163.92,6169.38,6179.71,6183.79,6164.92,2226,6168.88,6179.11,6183.29,6164.42


In [4]:
def get_mama_return(n_period, data, freq, split_date):
    
    data['ht'] = talib.HT_TRENDLINE(data.close)
    data.dropna(axis=0, inplace=True)
    data['mama'], data['fama'] = talib.MAMA(data.ht, fastlimit=n_period, slowlimit=n_period/10)
    data.dropna(axis=0, inplace=True)
    data['signals'] = np.where(data.mama > data.fama, 1, -1) 
    
    train, test = (data.loc[pd.Timestamp(split_date)-dt.timedelta(days=7):pd.Timestamp(split_date)-dt.timedelta(days=1)] , 
                   data.loc[pd.Timestamp(split_date):])
    
    train_backtest = IterativeBacktester(data=train, signals=train.signals, freq=freq)
    train_backtest.backtest(progress_bar=False)
    
    test_backtest = IterativeBacktester(data=test, signals=test.signals, freq=freq)
    test_backtest.backtest(progress_bar=False)

    return n_period, train_backtest.return_df.loc['TotalReturn', 'Portfolio'], test_backtest.return_df.loc['TotalReturn', 'Portfolio']

In [5]:
def get_tsf_return(n_period, data, freq, split_date):
    
    data['ht'] = talib.HT_TRENDLINE(data.close)
    data['tsf'] = talib.TSF(data.ht, n_period)
    data.dropna(axis=0, inplace=True)
    data['signals'] = np.where(data.ht > data.tsf, 1, -1)
    
    train, test = (data.loc[pd.Timestamp(split_date)-dt.timedelta(days=7):pd.Timestamp(split_date)-dt.timedelta(days=1)] , 
                   data.loc[pd.Timestamp(split_date):])
    
    train_backtest = IterativeBacktester(data=train, signals=train.signals, freq=freq)
    train_backtest.backtest(progress_bar=False)
    
    test_backtest = IterativeBacktester(data=test, signals=test.signals, freq=freq)
    test_backtest.backtest(progress_bar=False)

    return n_period, train_backtest.return_df.loc['TotalReturn', 'Portfolio'], test_backtest.return_df.loc['TotalReturn', 'Portfolio']

In [6]:
def get_slope_return(n_period, data, freq, split_date):
    
    data['ht'] = talib.HT_TRENDLINE(data.close)
    data.dropna(axis=0, inplace=True)
    data['signals'] = talib.LINEARREG_SLOPE(data.ht, timeperiod=n_period).apply(np.sign)
    data.dropna(axis=0, inplace=True)
    
    train, test = (data.loc[pd.Timestamp(split_date)-dt.timedelta(days=7):pd.Timestamp(split_date)-dt.timedelta(days=1)] , 
                   data.loc[pd.Timestamp(split_date):])
    
    train_backtest = IterativeBacktester(data=train, signals=train.signals, freq=freq)
    train_backtest.backtest(progress_bar=False)
    
    test_backtest = IterativeBacktester(data=test, signals=test.signals, freq=freq)
    test_backtest.backtest(progress_bar=False)

    return n_period, train_backtest.return_df.loc['TotalReturn', 'Portfolio'], test_backtest.return_df.loc['TotalReturn', 'Portfolio']

In [7]:
def get_performance(data, split_date, params, freq):
    # params --> (mama, slope, tsf)
      
    data['ht'] = talib.HT_TRENDLINE(data.close)
    data['mama'], data['fama'] = talib.MAMA(data.ht, fastlimit=params[0], slowlimit=params[0]/10)
    data['slope'] = talib.LINEARREG_SLOPE(data.ht, timeperiod=params[1])
    data['tsf'] = talib.TSF(data.ht, params[2])
    data.dropna(axis=0, inplace=True)

    # signals
    data['mama_signals'] = np.where(data.mama > data.fama, 1, -1) 
    data['slope_signals'] = data.slope.apply(np.sign)
    data['tsf_signals'] = np.where(data.ht > data.tsf, 1, -1)
    signal_cols = ['mama_signals', 'slope_signals', 'tsf_signals']
    data['agg_signals'] = data[signal_cols].mode(axis=1)
    
    # train/test split
    train, test = (data.loc[pd.Timestamp(split_date)-dt.timedelta(days=7):pd.Timestamp(split_date)-dt.timedelta(days=1)] , 
                   data.loc[pd.Timestamp(split_date):])
    
    # backtest train
    train_date_range = train.index[-1]-train.index[0]
    train_backtest = IterativeBacktester(data=train, signals=train.agg_signals, freq=freq)
    train_backtest.backtest(progress_bar=False)
    
    train_ret = train_backtest.return_df.loc['TotalReturn', 'Portfolio']
    train_signal_counts = train_backtest.signals.value_counts()
    train_signal_changes = train_backtest.signals.diff(1).dropna().apply(np.abs).value_counts()
    
    train_total_days = train_date_range.total_seconds() / (60*60*24)
    try:
        train_pos_short = train_signal_counts[-1]
    except:
        train_pos_short = 0
    try:
        train_pos_long = train_signal_counts[1]
    except:
        train_pos_long = 0
    train_pos_changes = (train_signal_changes.index * train_signal_changes).sum()
    
    # backtest test
    test_date_range = test.index[-1]-test.index[0]
    test_backtest = IterativeBacktester(data=test, signals=test.agg_signals, freq=freq)
    test_backtest.backtest(progress_bar=False)
    
    test_ret = test_backtest.return_df.loc['TotalReturn', 'Portfolio']
    test_signal_counts = test_backtest.signals.value_counts()
    test_signal_changes = test_backtest.signals.diff(1).dropna().apply(np.abs).value_counts()
    
    test_total_days = test_date_range.total_seconds() / (60*60*24)
    try:
        test_pos_short = test_signal_counts[-1]
    except:
        test_pos_short = 0
    try:
        test_pos_long = test_signal_counts[1]
    except:
        test_pos_long = 0
    test_pos_changes = (test_signal_changes.index * test_signal_changes).sum()
    
    # (train , test)
    # returns, #days, #short, #long, #posChanges
#     performances[params] = (train_ret, train_total_days, train_pos_short, train_pos_long, train_pos_changes,
#                             test_ret, test_total_days, test_pos_short, test_pos_long, test_pos_changes)
    return (params, 
            train_ret, train_total_days, train_pos_short, train_pos_long, train_pos_changes,
            test_ret, test_total_days, test_pos_short, test_pos_long, test_pos_changes)

In [8]:
freq = 'H1'
conn = sql.connect(f'../PriceData/PriceData_{freq}.db')
split_date = '2022-07-17'
train_returns = []
test_returns = []
optim_parameters = {}

for ticker in tqdm_notebook(my_assets):    
    data_sql = pd.read_sql(f"SELECT * FROM '{ticker}'", conn, parse_dates=['date'], index_col=['date'])
    for col in ['open', 'close', 'high', 'low']:
        data_sql[col] = data_sql[['bid'+col, 'ask'+col]].mean(axis=1)
    
    mama_results = p_umap(get_mama_return, np.arange(0.1, 1, 0.1),
                          repeat(data_sql.copy(deep=True)), 
                          repeat(freq),
                          repeat(split_date), 
                          leave=False)
    best_mama_period = pd.DataFrame(mama_results).sort_values(by=1, ascending=False).iloc[0, 0]
    
    tsf_results = p_umap(get_tsf_return, 
                         np.arange(200, 501, 10),
                         repeat(data_sql.copy(deep=True)), 
                         repeat(freq),
                         repeat(split_date),
                         leave=False)
    best_tsf_period = pd.DataFrame(tsf_results).sort_values(by=1, ascending=False).iloc[0, 0]
    
    slope_results = p_umap(get_slope_return, 
                            np.arange(200, 501, 10),
                            repeat(data_sql.copy(deep=True)), 
                            repeat(freq),
                            repeat(split_date),
                            leave=False)
    best_slope_period = pd.DataFrame(slope_results).sort_values(by=1, ascending=False).iloc[0, 0]
    
    params = (best_mama_period, best_slope_period, best_tsf_period)
    optim_parameters[ticker] = {}
    optim_parameters[ticker]['mama'] = float(best_mama_period)
    optim_parameters[ticker]['slope'] = int(best_slope_period)
    optim_parameters[ticker]['tsf'] = int(best_tsf_period)
    
    perf = get_performance(data_sql.copy(deep=True), split_date, params, freq)
    train_ret = perf[1]
    test_ret = perf[6]
    train_returns.append(train_ret)
    test_returns.append(test_ret)
    
    lines = [f"{ticker} | {freq}", 
             f'{params}\nTrainReturn: {train_ret*100:.2f} %\nTestReturn: {test_ret*100:.2f} %',
             '-'*50]
    for l in lines: print(l)
        
    with open(f'optimize_train_test_split{split_date}.txt', 'a') as f:
        f.writelines('\n'.join(lines))
        f.write('\n')
    
conn.close()

lines = ["*** Average returns ***", 
         f"Train {np.mean(train_returns)*100:.2f} %",
         f"Test {np.mean(test_returns)*100:.2f} %"]

for l in lines: print(l)

with open(f'optimize_train_test_split{split_date}.txt', 'a') as f:
        f.writelines('\n'.join(lines))

  0%|          | 0/24 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

EUR/USD | H1
(0.1, 200, 380)
TrainReturn: 0.86 %
TestReturn: -0.22 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

USD/JPY | H1
(0.1, 350, 350)
TrainReturn: 1.86 %
TestReturn: -0.63 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

GBP/USD | H1
(0.1, 200, 460)
TrainReturn: 1.37 %
TestReturn: -0.09 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

USD/CHF | H1
(0.1, 480, 240)
TrainReturn: 1.31 %
TestReturn: 1.52 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

AUD/USD | H1
(0.9, 210, 370)
TrainReturn: 1.51 %
TestReturn: 1.84 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

USD/CAD | H1
(0.1, 410, 500)
TrainReturn: 0.46 %
TestReturn: 0.29 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

NZD/USD | H1
(0.1, 200, 500)
TrainReturn: 0.37 %
TestReturn: 1.35 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

AUS200 | H1
(0.1, 240, 360)
TrainReturn: -0.09 %
TestReturn: 1.00 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

ESP35 | H1
(0.5, 200, 250)
TrainReturn: 2.35 %
TestReturn: -0.95 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

EUSTX50 | H1
(0.4, 200, 500)
TrainReturn: 2.27 %
TestReturn: 1.36 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

FRA40 | H1
(0.30000000000000004, 320, 360)
TrainReturn: 2.97 %
TestReturn: 1.46 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

GER30 | H1
(0.2, 210, 500)
TrainReturn: 5.15 %
TestReturn: -0.31 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

HKG33 | H1
(0.1, 200, 210)
TrainReturn: 4.10 %
TestReturn: 0.05 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

JPN225 | H1
(0.7000000000000001, 480, 250)
TrainReturn: 3.55 %
TestReturn: 2.27 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

NAS100 | H1
(0.6, 230, 200)
TrainReturn: -0.08 %
TestReturn: 3.11 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

SPX500 | H1
(0.6, 250, 360)
TrainReturn: 3.06 %
TestReturn: -2.87 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

UK100 | H1
(0.9, 480, 460)
TrainReturn: 0.75 %
TestReturn: 1.02 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

US30 | H1
(0.9, 500, 370)
TrainReturn: 3.81 %
TestReturn: 1.92 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Copper | H1
(0.1, 200, 310)
TrainReturn: 6.46 %
TestReturn: -2.78 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

NGAS | H1
(0.1, 210, 330)
TrainReturn: 11.28 %
TestReturn: 14.67 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

UKOil | H1
(0.9, 200, 320)
TrainReturn: 6.61 %
TestReturn: 2.25 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

USOil | H1
(0.9, 200, 310)
TrainReturn: 6.62 %
TestReturn: 6.01 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

XAU/USD | H1
(0.1, 210, 500)
TrainReturn: 2.17 %
TestReturn: -0.92 %
--------------------------------------------------


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

XAG/USD | H1
(0.1, 200, 440)
TrainReturn: 3.28 %
TestReturn: 0.59 %
--------------------------------------------------
*** Average returns ***
Train 3.00 %
Test 1.33 %


In [9]:
with open('my_parameters.json', 'w') as f:
    json.dump(optim_parameters, f, indent=4)

In [10]:
with open('my_parameters.json', 'r') as f:
    my_params = json.load(f)

In [11]:
my_params

{'EUR/USD': {'mama': 0.1, 'slope': 200, 'tsf': 380},
 'USD/JPY': {'mama': 0.1, 'slope': 350, 'tsf': 350},
 'GBP/USD': {'mama': 0.1, 'slope': 200, 'tsf': 460},
 'USD/CHF': {'mama': 0.1, 'slope': 480, 'tsf': 240},
 'AUD/USD': {'mama': 0.9, 'slope': 210, 'tsf': 370},
 'USD/CAD': {'mama': 0.1, 'slope': 410, 'tsf': 500},
 'NZD/USD': {'mama': 0.1, 'slope': 200, 'tsf': 500},
 'AUS200': {'mama': 0.1, 'slope': 240, 'tsf': 360},
 'ESP35': {'mama': 0.5, 'slope': 200, 'tsf': 250},
 'EUSTX50': {'mama': 0.4, 'slope': 200, 'tsf': 500},
 'FRA40': {'mama': 0.30000000000000004, 'slope': 320, 'tsf': 360},
 'GER30': {'mama': 0.2, 'slope': 210, 'tsf': 500},
 'HKG33': {'mama': 0.1, 'slope': 200, 'tsf': 210},
 'JPN225': {'mama': 0.7000000000000001, 'slope': 480, 'tsf': 250},
 'NAS100': {'mama': 0.6, 'slope': 230, 'tsf': 200},
 'SPX500': {'mama': 0.6, 'slope': 250, 'tsf': 360},
 'UK100': {'mama': 0.9, 'slope': 480, 'tsf': 460},
 'US30': {'mama': 0.9, 'slope': 500, 'tsf': 370},
 'Copper': {'mama': 0.1, 'slope'