In [1]:
import sys 
sys.path.append('../')
from itertools import product, repeat
import pandas as pd 
import numpy as np 
from scipy.stats import mode
import talib
import sqlite3 as sql
import matplotlib.pyplot as plt
from p_tqdm import p_map, p_umap, t_map
from backtester import * 
from fxcmtoken import my_assets

In [2]:
for i, a in enumerate(my_assets):
    print(i, a)

0 EUR/USD
1 USD/JPY
2 GBP/USD
3 USD/CHF
4 AUD/USD
5 USD/CAD
6 NZD/USD
7 Bund
8 AUS200
9 ESP35
10 EUSTX50
11 FRA40
12 GER30
13 HKG33
14 JPN225
15 NAS100
16 SPX500
17 UK100
18 US30
19 Copper
20 NGAS
21 UKOil
22 USOil
23 XAU/USD
24 XAG/USD


In [3]:
freq = 'H1'
sql_path = f'/home/kachain/python_projects/algorithmic_trading/PriceData/PriceData_{freq}.db'
conn = sql.connect(sql_path)
ticker = my_assets[0]
data_sql = pd.read_sql(f"SELECT * FROM '{ticker}'", conn, parse_dates=['date'], index_col=['date'])
for col in ['open', 'close', 'high', 'low']:
    data_sql[col] = data_sql[['bid'+col, 'ask'+col]].mean(axis=1)
    
print(ticker)
print(data_sql.shape)
data_sql.tail()

EUR/USD
(10000, 13)


Unnamed: 0_level_0,bidopen,bidclose,bidhigh,bidlow,askopen,askclose,askhigh,asklow,tickqty,open,close,high,low
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2022-08-26 16:00:00,0.99963,0.99792,1.00023,0.99747,0.99976,0.99805,1.00036,0.9976,18846,0.999695,0.997985,1.000295,0.997535
2022-08-26 17:00:00,0.99792,0.9969,0.99824,0.99579,0.99806,0.99704,0.99837,0.99594,12934,0.99799,0.99697,0.998305,0.995865
2022-08-26 18:00:00,0.99691,0.99636,0.99727,0.9958,0.99704,0.99648,0.99741,0.99594,11144,0.996975,0.99642,0.99734,0.99587
2022-08-26 19:00:00,0.99635,0.99649,0.99666,0.99558,0.99648,0.99663,0.99682,0.99572,8444,0.996415,0.99656,0.99674,0.99565
2022-08-26 20:00:00,0.99649,0.99634,0.99687,0.99589,0.99662,0.99665,0.99701,0.99637,3720,0.996555,0.996495,0.99694,0.99613


In [4]:
sat_index = data_sql.resample('W-Sat').last().index

In [5]:
# params = product(np.arange(0.01, 1, 0.01), repeat=2)
# params = list(params)
# len(params)

params = []
for i in np.arange(0.1, 1, 0.1):
    for j in np.arange(0.01, 0.11, 0.01):
        params.append((i,j))
len(params)

90

In [6]:
def get_mama_signals(data, param, start, end):
    data = data.copy(deep=True)
    data["mama"], data["fama"] = talib.MAMA(data.close, fastlimit=param[0], slowlimit=param[1])
    data.dropna(axis=0, inplace=True)
    data['signal'] = np.where(data["mama"] > data["fama"], 1,
                               np.where(data["mama"] < data["fama"], -1, np.nan)
                              )
    data['signal'] = data['signal'].ffill()
    return data.loc[start:end]

In [7]:
def get_strategy_return(data, param, freq, start, end):    
    data = get_mama_signals(data, param, start, end)
    backtester = IterativeBacktester(data=data, signals=data.signal, freq=freq)
    backtester.backtest(progress_bar=False)

    return param, backtester.return_df.loc['TotalReturn', 'Portfolio']

def get_optim_param(data, params, freq, start, end): 
    res = t_map(get_strategy_return, repeat(data), params, repeat(freq), 
                 repeat(start), repeat(end), leave=False)
    res = pd.DataFrame(res, columns=['params', 'returns'])
    res.sort_values(by='returns', ascending=False, inplace=True)
    best = res.iloc[0]
    
    return start, end, best['params'], best['returns'] 

In [8]:
start_dates = []
end_dates = []
for i in range(1, len(sat_index)-1):
    start_dates.append(sat_index[i])
    end_dates.append(sat_index[i+1])

In [None]:
optim_params = p_umap(get_optim_param, repeat(data_sql), repeat(params), repeat(freq), start_dates, end_dates)

  0%|          | 0/80 [00:00<?, ?it/s]

In [None]:
opt_df = pd.DataFrame(optim_params, columns=['starts', 'ends', 'params', 'returns'])

In [None]:
opt_df

In [None]:
opt_df.returns.plot(kind='bar', figsize=(15, 5))
plt.tight_layout()
plt.show()

In [None]:
opt_df.returns.apply(np.sign).value_counts(normalize=True)

In [None]:
opt_df.returns.mean()

In [None]:
opt_df['fast'] = opt_df.params.apply(lambda x: x[0])
opt_df['slow'] = opt_df.params.apply(lambda x: x[1])

In [None]:
opt_df['fast'].plot(kind='bar', figsize=(15, 5))
plt.tight_layout()
plt.show()

In [None]:
opt_df.fast.value_counts(normalize=True).iloc[:5]

In [None]:
print('median'.ljust(10), opt_df.fast.median())
print('mean'.ljust(10), opt_df.fast.mean())
print('mode'.ljust(10), opt_df.fast.mode()[0])

In [None]:
opt_df['slow'].plot(kind='bar', figsize=(15, 5))
plt.tight_layout()
plt.show()

In [None]:
opt_df.slow.value_counts(normalize=True).iloc[:5]

In [None]:
print('median'.ljust(10), opt_df.slow.median())
print('mean'.ljust(10), opt_df.slow.mean())
print('mode'.ljust(10), opt_df.slow.mode()[0])

In [None]:
n_roll = 5

In [None]:
fig, ax = plt.subplots(figsize=(15,5))
# exclude last one to be out-of-sample
fast_roll_median = opt_df.fast.iloc[:-1].rolling(n_roll).median()
fast_roll_mean = opt_df.fast.iloc[:-1].rolling(n_roll).mean()
fast_roll_mode = opt_df.fast.iloc[:-1].rolling(n_roll).apply(lambda x: mode(x)[0])

fast_roll_median.plot(ax=ax)
fast_roll_mean.plot(ax=ax)
fast_roll_mode.plot(ax=ax)

ax.legend(['median', 'mean', 'mode'])
ax.set_title(f'n rolling: {n_roll}')
plt.tight_layout()
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(15,5))
# exclude last one to be out-of-sample
slow_roll_median = opt_df.slow.iloc[:-1].rolling(n_roll).median()
slow_roll_mean = opt_df.slow.iloc[:-1].rolling(n_roll).mean()
slow_roll_mode = opt_df.slow.iloc[:-1].rolling(n_roll).apply(lambda x: mode(x)[0])

slow_roll_median.plot(ax=ax)
slow_roll_mean.plot(ax=ax)
slow_roll_mode.plot(ax=ax)

ax.legend(['median', 'mean', 'mode'])
ax.set_title(f'n rolling: {n_roll}')
plt.tight_layout()
plt.show()

## median

In [None]:
fast_p = fast_roll_median.dropna().values
slow_p = slow_roll_median.dropna().values

params = [*zip(fast_p, slow_p)]

In [None]:
test_returns =  p_map(get_strategy_return, repeat(data_sql), params, repeat(freq), 
                       start_dates[-len(params):], end_dates[-len(params):])

In [None]:
test_returns = pd.DataFrame(test_returns, columns=['params', 'returns'])

In [None]:
test_returns.returns.plot(kind='bar',figsize=(15, 5))
plt.tight_layout()
plt.show()

In [None]:
test_returns.returns.apply(np.sign).value_counts(normalize=True)

In [None]:
test_returns.returns.mean() * 100

## mean

In [None]:
fast_p = fast_roll_mean.dropna().values
slow_p = slow_roll_mean.dropna().values

params = [*zip(fast_p, slow_p)]

In [None]:
test_returns =  p_map(get_strategy_return, repeat(data_sql), params, repeat(freq), 
                       start_dates[-len(params):], end_dates[-len(params):])

In [None]:
test_returns = pd.DataFrame(test_returns, columns=['params', 'returns'])

In [None]:
test_returns.returns.plot(kind='bar',figsize=(15, 5))
plt.tight_layout()
plt.show()

In [None]:
test_returns.returns.apply(np.sign).value_counts(normalize=True)

In [None]:
test_returns.returns.mean() * 100

## mode

In [None]:
fast_p = fast_roll_mode.dropna().values
slow_p = slow_roll_mode.dropna().values

params = [*zip(fast_p, slow_p)]

In [None]:
test_returns =  p_map(get_strategy_return, repeat(data_sql), params, repeat(freq), 
                       start_dates[-len(params):], end_dates[-len(params):])

In [None]:
test_returns = pd.DataFrame(test_returns, columns=['params', 'returns'])

In [None]:
test_returns.returns.plot(kind='bar',figsize=(15, 5))
plt.tight_layout()
plt.show()

In [None]:
test_returns.returns.apply(np.sign).value_counts(normalize=True)

In [None]:
test_returns.returns.mean() * 100