In [1]:
import os
import numpy as np
import pandas as pd
from datetime import datetime
from tqdm import tqdm
import json
import time 
import ccxt

from config import TradingType
from logger.logger_file import logger
from data.data_management import Data_Manager
from strategy.indicators_management import Indicators_Manager
from strategy.strategy_management import Strategy_Manager
from exchange.order_management import Order_Manager
from performance.metrics import calc_position_metrics

config = {'general': {'trading_type': 'BACKTEST',
                      'klines_db_location': './database/binance_kline.db'},
          'strategy': {'strategy': ['strategy.TF_tide_0:TF_tide_0'],
                       'allocation_total_notional': 100_000,
                       'allocation_per_pair_pct': 10,
                       'timeframes': {"15m": "15m", "1h":"1h","4h":"4h"},
                       'indicators': {'atr': {'length': [14]},
                                      'stdev': {'length':[14]},
                                      'rsi': {'length': [14], 'close': ['close']},
                                      'ema': {'length': [8, 13, 21, 34, 55, 89], 'close': ['close']}
                                      },
                       'initial_position_pct': 1,
                       'trailing_engaged_upnl_pct': 6,  # not required- default will be 100 to prevent trigger
                       'trailing_take_profit_upnl_pct': 4,
                       'martingale_factor': 2,
                       'stoploss': 10,
                       'shorts_enabled': False},
          'BACKTEST': {'comment': 'test',
                       'pairs': ['BTCUSDT', 'ETHUSDT', 'XTZUSDT', 'XRPUSDT'],
                       # , 'ATOMUSDT','ZECUSDT', 'ZILUSDT','ZRXUSDT', 'DOGEUSDT', 'XMRUSDT'],
                       'workers': 4,
                       'vectorised_indicators': True,
                       'window': ['2019-01-01', '2022-06-15'],
                       'klines_type': 'spot',
                       'update_db': True,
                       'slippage': 0.001,  # slippage.fixed(0.001) or slippage.dynamic() : SLIPPAGE
                       'fee': 0.0004,
                       'output_path': "./backtests/",
                       'publish': True,
                       'dash_port': 8000}
          }

pair = "BTCUSDT"
strategy = config['strategy']['strategy'][0]
strategy_instructions = [{'pair':pair,
                          'strategy':strategy,
                          'config':config} for pair in config["BACKTEST"]["pairs"]]
strategy_instruction = [i for i in strategy_instructions if i["pair"]==pair][0]

logger is initialized...


In [None]:
#%%

"""
Pair wise backtest loop

"""

# strategy_instruction = strategy_instructions[-1]
# UNPACK backtest instructions
config = strategy_instruction["config"]
window = config['BACKTEST']['window']
trading_type = TradingType(config["general"]["trading_type"])
# =============================================================================
# Instantiate Trade Objects
# =============================================================================

# -----------------------------------
# Order manager
# -----------------------------------
#TODO: for tradair implementation have to remove dependence on CCXT -> wire in callback from tradair client (fix engine)
pair = strategy_instruction["pair"]
fee = config['BACKTEST']["fee"]
slippage = config['BACKTEST']["slippage"]

Order_manager = Order_Manager(trading_type=trading_type,
                              slippage=slippage,
                              fee=fee)

# -----------------------------------
# Strategy Manager
# -----------------------------------
#TODO: rename max_allocation to per_pair_allocation
strategy = strategy_instruction['strategy']

# Feed [strategy] section into strategy_manager
config_strategy = {key:value for key,value in config["strategy"].items() if key != "strategy"}
config_strategy["strategy"] = strategy

Strategy_manager = Strategy_Manager(strategy=strategy,
                                    config_strategy = config_strategy,
                                    Order_manager = Order_manager,
                                    verbose = False)


# -----------------------------------
# Data and indicators manager
# -----------------------------------
try:
    timeframes = config['strategy']['timeframes']
except:
    timeframes = None
try:
    indicators = config['strategy']['indicators']
except:
    indicators = None

indicators_needed_by_datetime = datetime.strptime(window[0], "%Y-%m-%d") 
klines_db_location = config["general"]["klines_db_location"]
update_db = config["BACKTEST"]["update_db"]
klines_type = config["BACKTEST"]["klines_type"]
# Initialise indicators manager
Indicators_manager = Indicators_Manager(indicators = indicators,
                                        postprocess_klines=Strategy_manager.strategy.postprocess_klines,
                                        preprocess_klines = Strategy_manager.strategy.preprocess_klines)
Strategy_manager.strategy.Indicators_manager = Indicators_manager

# Initialise data manager
Data_manager = Data_Manager(timeframes=timeframes,
                            indicators=indicators,
                            indicators_needed_by_datetime=indicators_needed_by_datetime,
                            klines_db_location=klines_db_location,
                            update_db=False,
                            klines_type=klines_type,
                            Indicators_manager=Indicators_manager)




# =============================================================================
# Load DATA beforehand
# =============================================================================

# -----------------------------------
# load klines
# -----------------------------------
t0 = time.time()
logger.info("Loading klines .... ")
Data_manager.load_ohlcv(pair=pair)
logger.info(f"time taken to load klines {time.time() - t0}")
# klines_dict = Data_manager.klines_dict


# -----------------------------------
# calculate indicators 
# -----------------------------------
if config["BACKTEST"]["vectorised_indicators"]:
    Data_manager.calc_all_indicators()

In [None]:
klines_dict = Data_manager.klines_dict
klines_dict.keys()

In [None]:
config["BACKTEST"]["output_path"]

In [None]:
klines_dict["1m"]

# CatBoostClassifier

## Catboost across multiple timeframes

In [None]:
from catboost import CatBoostClassifier, Pool,CatBoostRegressor

In [None]:
df_1m = klines_dict["1m"].copy()
df_15m = klines_dict["15m"].copy()
df_1h = klines_dict["1h"].copy()
df_4h = klines_dict["4h"].copy()

In [None]:
df_1m.columns

### a) concat all timeframe dataframes

In [None]:
import pandas as pd
from functools import reduce
from tqdm import tqdm

temp=[]
features = ['open', 'high', 'low', 'close', 'volume','quote_vol', 'nTrades','takerBuy_quoteAssetVol', 'takerBuy_baseAssetVol',
            'ATRr_14', 'RSI_14', 'EMA_67',
            'tide', 'ebb', 'flow']
for timeframe,df in klines_dict.items():
    temp_i = df[features].copy()
    temp_i.dropna(inplace=True)
    temp_i["tide"] = temp_i["tide"].astype(int)
    temp_i = temp_i.add_suffix(f"_{timeframe}")
    temp.append(temp_i)
    
df = reduce(lambda  left,right: pd.merge(left,right, left_index=True,right_index=True,how='outer'), temp)
df.dropna(subset=["tide_1m"],inplace=True)
df.fillna(method="ffill",inplace=True)
df.dropna(inplace=True)

In [None]:
import json
from urllib.request import urlopen
import pandas as pd

url = 'https://api.blockchain.info/charts/mempool-size?format=json&timespan=3year&sampled=false'
data = json.loads(urlopen(url).read())

In [None]:
df1 = pd.DataFrame(data["values"])
df1['x'] = pd.to_datetime(df1['x'], unit='s').round("1T")
df1.set_index(keys=['x'], inplace=True, drop=True)
df1.rename(columns={"y":"mempoolsize"},inplace=True)

In [None]:
df0=pd.merge(df,df1,right_index=True,left_index=True,how="left").fillna(method="ffill")

In [None]:
import pickle
with open(f'./btcusdt_preprocessed.pickle', 'wb') as handle:
    pickle.dump(df, handle)

### b) Label generation using triple barrier

In [None]:
def get_vol(prices, span=100, delta=pd.Timedelta(hours=1)):
    # 1. compute returns of the form p[t]/p[t-1] - 1
    # 1.1 find the timestamps of p[t-1] values
#     df0 = prices.index.searchsorted(prices.index - delta)
#     df0 = df0[df0 > 0]
#     # 1.2 align timestamps of p[t-1] to timestamps of p[t]
#     df0 = pd.Series(prices.index[df0-1],index=prices.index[prices.shape[0]-df0.shape[0] : ])
#     # 1.3 get values by timestamps, then compute returns
#     df0 = prices.loc[df0.index] / prices.loc[df0.values].values - 1

    df0 = prices.pct_change()
    # 2. estimate rolling standard deviation
    df0 = df0.ewm(span=span).std()
    return df0

In [None]:
def get_horizons(prices, delta=pd.Timedelta(minutes=15)):
    t1 = prices.index.searchsorted(prices.index + delta)
    t1 = t1[t1 < prices.shape[0]]
    t1 = prices.index[t1]
    t1 = pd.Series(t1, index=prices.index[:t1.shape[0]])
    t1.name = "t1"
    return t1

In [None]:
def get_touches(prices, events, factors=[1, 1]):
    '''
    events: pd dataframe with columns
    t1: timestamp of the next horizon
    threshold: unit height of top and bottom barriers
    side: the side of each bet
    factors: multipliers of the threshold to set the height of 
           top/bottom barriers
    '''
    out = events[['t1']].copy(deep=True)
    if factors[0] > 0: 
        thresh_uppr = factors[0] * events['threshold']
    else:
        thresh_uppr = pd.Series(index=events.index) # no uppr thresh
    if factors[1] > 0:
        thresh_lwr = -factors[1] * events['threshold']
    else:
        thresh_lwr = pd.Series(index=events.index)  # no lwr thresh
    for loc, t1 in tqdm(events['t1'].iteritems()):
        df0=prices[loc:t1]                              # path prices
        df0=(df0 / prices[loc] - 1) * events.side[loc]  # path returns
        out.loc[loc, 'stop_loss'] = df0[df0 < thresh_lwr[loc]].index.min()  # earliest stop loss
        out.loc[loc, 'take_profit'] = \
        df0[df0 > thresh_uppr[loc]].index.min() # earliest take profit
    return out

In [None]:
def get_labels(touches):
  out = touches.copy(deep=True)
  # pandas df.min() ignores NaN values
  first_touch = touches[['stop_loss', 'take_profit']].min(axis=1)
  for loc, t in tqdm(first_touch.iteritems()):
    if pd.isnull(t):
      out.loc[loc, 'label'] = 0
    elif t == touches.loc[loc, 'stop_loss']: 
      out.loc[loc, 'label'] = -1
    else:
      out.loc[loc, 'label'] = 1
  return out

In [None]:
data_ohlc = df.copy()
data_ohlc = data_ohlc.assign(threshold=get_vol(data_ohlc["close_1m"])).dropna()
t1=get_horizons(data_ohlc)
data_ohlc = pd.merge(data_ohlc, t1, left_index=True, right_index=True, how="left").dropna()
# data_ohlc = data_ohlc.assign(t1=get_horizons(data_ohlc)).dropna()
events = data_ohlc[['t1', 'threshold']] 
events = events.assign(side=pd.Series(1., events.index)) # long only
touches = get_touches(data_ohlc["close_1m"], events, [1,1])
touches = get_labels(touches)
data_ohlc = data_ohlc.assign(label=touches.label)

In [None]:
t=467864-1
prices= data_ohlc["close_1m"].copy()
factors = [1,1]
out = events[['t1']].copy(deep=True)
if factors[0] > 0: 
    thresh_uppr = factors[0] * events['threshold']
else:
    thresh_uppr = pd.Series(index=events.index) # no uppr thresh
if factors[1] > 0:
    thresh_lwr = -factors[1] * events['threshold']
else:
    thresh_lwr = pd.Series(index=events.index)  # no lwr thresh
    
loc = events.iloc[t,:].name
t1=events["t1"][t]

In [None]:
        df0=prices[loc:t1]                              # path prices
        df0=(df0 / prices[loc] - 1) * events.side[loc]  # path returns


In [None]:
df0

In [None]:
df0.dropna()

In [None]:
        out.loc[loc, 'stop_loss'] = df0[df0 < thresh_lwr[loc]].index.min()  # earliest stop loss
        out.loc[loc, 'take_profit'] = \
        df0[df0 > thresh_uppr[loc]].index.min() # earliest take profit

### c) train/validation/test split

In [None]:
features = list(df0.columns)
print(features)

In [None]:
df=df0.copy()
features = list(df.columns)
#             ['tide_1m', 'ebb_1m', 'flow_1m', 
#             'tide_15m','ebb_15m', 'flow_15m', 
#             'tide_1h', 'ebb_1h', 'flow_1h',
#             'tide_4h', 'ebb_4h', 'flow_4h']
cat_features = ['tide_1m','tide_15m','tide_1h','tide_4h']
# df["Y1"] = df["close_1m"].pct_change(periods=15).shift(-15).dropna()
# df["Y"]=np.where(df["Y1"] > 0, 1, 0)
df["Y1"] = df["ATRr_14_1m"].shift(-15).dropna()
labels = ["Y1"]

train_window = ["2020-01-01","2020-06-30"]
val_window = ["2020-07-01","2020-09-30"]
test_window = ["2020-10-01","2020-12-31"]

train_data = df.dropna()[train_window[0]:train_window[1]][features]
train_labels = df.dropna()[train_window[0]:train_window[1]][labels]

val_data = df.dropna()[val_window[0]:val_window[1]][features]
val_labels = df.dropna()[val_window[0]:val_window[1]][labels]

test_data = df.dropna()[test_window[0]:test_window[1]][features]
test_labels = df.dropna()[test_window[0]:test_window[1]][labels]

print(f"train data window : {str(train_data.index[0])} - {str(train_data.index[-1])}")
print(f"train label window: {str(train_labels.index[0])} - {str(train_labels.index[-1])}")

print(f"val data window  : {str(val_data.index[0])} - {str(val_data.index[-1])}")
print(f"val label window : {str(val_labels.index[0])} - {str(val_labels.index[-1])}")

print(f"test data window  : {str(test_data.index[0])} - {str(test_data.index[-1])}")
print(f"test label window : {str(test_labels.index[0])} - {str(test_labels.index[-1])}")

print(train_labels.hist())
print(val_labels.hist())
print(test_labels.hist())

train_pool = Pool(data=train_data, label = train_labels)
valid_pool = Pool(data=val_data, label = val_labels)


In [None]:

# model_T = CatBoostClassifier(task_type="GPU",learning_rate=0.03, custom_loss = ['Accuracy'])
model_T = CatBoostRegressor(task_type="GPU",learning_rate=0.03)
model_T.fit(train_pool, eval_set=valid_pool,
       verbose=False,
       plot=True)

In [None]:
y_pred = model_T.predict(test_data)
plot_df=test_labels.copy()
plot_df["pred_Y"]= y_pred
# plot_df.plot()
df_fi = pd.DataFrame(model_T.feature_importances_, index=train_data.columns, ).sort_values(by=0,ascending=False)
print(df_fi[df_fi>0.0].dropna())
print(df_fi[df_fi==0.0].dropna())

In [None]:
import matplotlib.pyplot as plt
probs = model_T.predict_proba(test_data)
plt.hist(probs,bins=100)

In [None]:
from sklearn.metrics import accuracy_score
print("Accuracy model: ", accuracy_score(test_labels, y_pred))
print("Accuracy tide_1m : ", accuracy_score(test_labels, test_data["tide_1m"]))
print("Accuracy tide_15m : ", accuracy_score(test_labels, test_data["tide_15m"]))
print("Accuracy tide_1h : ", accuracy_score(test_labels, test_data["tide_1h"]))
print("Accuracy tide_4h : ", accuracy_score(test_labels, test_data["tide_4h"]))
# print("Accuracy tide vs pred : ", accuracy_score(y_pred, test_data["tide_1h"]))

In [None]:
test=test_labels.copy()
test["y_pred"] = y_pred
test["prob"] = df["close_1m"][test_window[0]:test_window[1]] #probs[:,1]
test["tide_1m"]=test_data["tide_1m"]
test["tide_15m"]=test_data["tide_15m"]
test["tide_1h"]=test_data["tide_1h"]
test["tide_4h"]=test_data["tide_4h"]
test["close_1m"] = df["close_1m"][test_window[0]:test_window[1]]
test["ATRr_14_1m"] = df["ATRr_14_1m"][test_window[0]:test_window[1]]

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from plotly_resampler import FigureWidgetResampler
fig = FigureWidgetResampler(make_subplots(rows=8, cols=1,
                    shared_xaxes=True,
                    vertical_spacing = 0.055,
                    row_heights=[1,1,1,1,1,1,1,3],
                    specs=[[{"type":"scatter"}],
                           [{"type":"scatter"}],
                           [{"type":"scatter"}],
                           [{"type":"scatter"}],
                           [{"type":"scatter"}],
                           [{"type":"scatter"}],
                           [{"type":"scatter"}],
                           [{"type":"scatter"}]
                           ],
                    subplot_titles = ("4h_tide","1h_tide","15m_tide", "1m_tide","Y","prob", "y_pred", "close")
                    ))
row=1    
for col in ["tide_4h","tide_1h","tide_15m","tide_1m", "Y1", "prob", "y_pred", "close_1m"]:
    print(col)
    if col == "close_1m":
        ax = go.Scattergl(x=test.index, y=test["close_1m"],name=col)
        fig.append_trace(ax,row=row,col=1)
        row+=1
    else:
        ax = go.Scattergl(x=test.index, y=test[col],name=col)
        fig.append_trace(ax,row=row,col=1)
        row+=1

fig.update_layout(
    autosize=False,
    width=1000,
    height=1000,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
)

In [1]:
[[{"type":"scatter"}]*2]

[[{'type': 'scatter'}, {'type': 'scatter'}]]

In [32]:
import numpy as np
len_df = 86371
test_size = 10
min_train_size = 86351
n_splits = 11
np.floor((len_df - test_size - min_train_size) / (n_splits - 1))

1.0

In [34]:
shift = (len_df - test_size - min_train_size) / (n_splits - 1)
shift

1.0