In [None]:
import sys
sys.path.append("..")

import datetime as dt
from ipywidgets import interact
import itertools
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pandas_ta as pta
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns
import statsmodels.api as sm
import vectorbtpro as vbt

from lib import bitget_loader, utils

# Setup

In [None]:
symbol = 'ADAUSDT'
is_start = dt.date(2022,1,1)
is_end = dt.date(2022,12,31)

os_start = dt.date(2023,1,1)
os_end = dt.date(2024,4,18)

df = bitget_loader.load_klines_in_date_range(symbol, is_start, os_end).tz_convert(None) # convert to tz naive so I can loc with dates with pandas
df = df.drop_duplicates() # bitget has overlaps in their data

In [None]:
df = df.resample('15min').agg({'open':'first', 'high':'max', 'low':'min', 'close':'last', 'volume':'sum', 'usdtvolume':'sum'})

# Target

In [None]:
df['y_return'] = np.where(df['close']-df['open']>0, 1, np.where(df['close']-df['open']<0, -1, np.nan))
#df['y_return'] = df['close'].diff(5)>0
df['y_return'] = df['y_return'].shift(-1)
df = df.dropna() # last row has no target, so drop it

# Features

## Momentum

In [None]:
df['x_return_zs'] = pta.zscore(df['close'].pct_change(), 200)

In [None]:
for l in [7, 25, 99]:
    df[f'sma_{l}'] = df['close'].rolling(l).mean()
    df[f'x_sma_{l}_roc'] = df[f'sma_{l}'].pct_change()
    df[f'x_sma_{l}_distance'] = (df['close']-df[f'sma_{l}'])/df[f'sma_{l}']

for p in itertools.combinations([7, 25, 99], 2):
    df[f'x_sma_{p[0]}_{p[1]}_distance'] = (df[f'sma_{p[0]}']-df[f'sma_{p[1]}'])/df[f'sma_{p[1]}']

In [None]:
for l in [7, 25, 99]:
    df[[f'x_adx_{l}', f'x_dmp_{l}', f'x_dmn_{l}']] = pta.adx(df['high'], df['low'], df['close'], l)

## Volume

In [None]:
df['logvolume'] = np.log(df['volume'])

In [None]:
fig, ax = plt.subplots(1,2, figsize=(12, 4))
df['volume'][df['volume'].rank(pct=True)<0.9].hist(ax=ax[0])
df['logvolume'].hist(ax=ax[1])
ax[0].set_title("Distribution of Volume")
ax[1].set_title("Distribution of log(Volume)")

In [None]:
df['x_logvolume_zs'] = pta.zscore(df['logvolume'], 20)
df['x_logvolume_zs_ma'] = df['x_logvolume_zs'].rolling(20).mean()
df['x_relative_volume_zs'] = df['logvolume'].groupby(df.index.time).apply(lambda d: pta.zscore(d, 20)).droplevel(0).sort_index()
df['x_relative_volume_zs_ma'] = df['x_relative_volume_zs'].rolling(20).mean()
df['x_volume_corr'] = df['volume'].rolling(20).corr(df['close'].pct_change().abs())

## Volatility

In [None]:
df['tr'] = pta.true_range(df['high'], df['low'], df['close'])
df['logtr'] = np.log(df['tr'])

df['x_tr_zs'] = pta.zscore(df['logtr'], 20)
df['x_tr_zs_ma'] = df['x_tr_zs'].rolling(20).mean()
df['x_relative_tr_zs'] = df['logtr'].groupby(df.index.time).apply(lambda d: pta.zscore(d, 20)).droplevel(0).sort_index()
df['x_relative_tr_zs_ma'] = df['x_relative_tr_zs'].rolling(20).mean()
df['x_range_zs'] = pta.zscore(np.log((df['high']-df['low'])/df['open']), 200)

In [None]:
df['natr'] = df['tr'].ewm(720).mean()/df['close']

## Plot

In [None]:
fig = go.FigureWidget(make_subplots(rows=3, cols=1, shared_xaxes=True, row_heights=[0.6, 0.2, 0.2]))
fig.add_trace(go.Candlestick(), row=1, col=1)
fig.add_trace(go.Scatter(), row=2, col=1)
fig.add_trace(go.Scatter(), row=3, col=1)
fig.update_layout(height=600, margin=dict(l=20,r=20,b=20,t=20), xaxis=dict(rangeslider=dict(visible=False)))

@interact(date=np.unique(df.index.date), col=df.columns, col2=df.columns)
def update(date, col, col2):
   with fig.batch_update():
      _sdf = df.loc[str(date)]
      fig.data[0].x, fig.data[0].open, fig.data[0].high = _sdf.index, _sdf['open'], _sdf['high']
      fig.data[0].low, fig.data[0].close = _sdf['low'], _sdf['close']
      fig.data[1].x, fig.data[1].y = _sdf.index, _sdf[col]
      fig.data[2].x, fig.data[2].y = _sdf.index, _sdf[col2]
      fig.update_layout()
fig


# Training

In [None]:
from pycaret.classification import ClassificationExperiment

In [None]:
x_train = df.loc[:is_end][utils.get_prefixed_cols(df, 'x_')]
y_train = df.loc[:is_end]['y_return']

In [None]:
exp = ClassificationExperiment()
exp.setup(
    data=x_train, target=y_train,
    train_size=0.7,
    data_split_shuffle=False,
    data_split_stratify=False,
    numeric_imputation='drop',
    remove_multicollinearity=True,
    multicollinearity_threshold=0.8,
    normalize=True,
    pca=False,
    feature_selection=True,
    n_features_to_select=0.5,
    remove_outliers=False,
    fold_strategy='kfold',
    fold=5,
    fold_shuffle=False,
    )

In [None]:
exp.X_transformed.columns

In [None]:
best = exp.compare_models(n_select=3)

In [None]:
#best = exp.create_model('lr')

In [None]:
#exp.tune_model(best)

In [None]:
exp.plot_model(best[0], 'threshold')

In [None]:
exp.plot_model(best[2], 'threshold')

## Modelling Holdout Accuracy

## Backtest in Modelling Holdout

In [None]:
bdf = df.loc[exp.test.index]
bdf[['prediction_label', 'prediction_score']] = exp.predict_model(best[0])[['prediction_label', 'prediction_score']]

In [None]:
le = bdf['prediction_label'] == 1
le &= bdf['prediction_score'] > 0.7

se = bdf['prediction_label'] == -1
se &= bdf['prediction_score'] > 0.7

le = utils.crossover(le, 0.5)
se = utils.crossover(se, 0.5)

pf = vbt.Portfolio.from_signals(
    bdf['close'], open=bdf['open'], high=bdf['high'], low=bdf['low'],
    entries=le, short_entries=se,
    freq='5min',
    td_stop=2,
    time_delta_format=0,
)

In [None]:
pf.stats()

In [None]:
pf.value.plot()

### TPSL Opt, Pct Based

In [None]:
tpsl_mults = np.arange(0.005, 0.2, 0.005)
pf = vbt.Portfolio.from_signals(
    bdf['close'], open=bdf['open'], high=bdf['high'], low=bdf['low'],
    entries=le, short_entries=se,
    freq='5min',
    td_stop=2,
    time_delta_format=0,
    sl_stop=vbt.Param(tpsl_mults),
    tp_stop=vbt.Param(tpsl_mults),
    slippage=0.0001,
)

In [None]:
#stat_result = pf.trades.get_profit_factor().unstack()
stat_result = pf.trades.win_rate.unstack()
stat_result = stat_result.sort_index().sort_index(axis=1)

In [None]:
sns.heatmap(stat_result, annot=False)

### TPSL Opt, ATR Based

In [None]:
tpsl_mults = np.arange(0.5, 5, 0.25)
pf = vbt.Portfolio.from_signals(
    bdf['close'], open=bdf['open'], high=bdf['high'], low=bdf['low'],
    entries=le, short_entries=se,
    freq='5min',
    td_stop=2,
    time_delta_format=0,
    sl_stop=vbt.Param([x*bdf['natr'] for x in tpsl_mults]),
    tp_stop=vbt.Param([x*bdf['natr'] for x in tpsl_mults]),
    slippage=0.0001,
)

In [None]:
stat_result = pf.trades.get_profit_factor().unstack()
stat_result.index = stat_result.index.str[7:].astype(int)
stat_result.columns = stat_result.columns.str[7:].astype(int)
stat_result = stat_result.sort_index().sort_index(axis=1)
stat_result.index = tpsl_mults
stat_result.columns = tpsl_mults

In [None]:
sns.heatmap(stat_result, annot=False)

In [None]:
pf = vbt.Portfolio.from_signals(
    bdf['close'], open=bdf['open'], high=bdf['high'], low=bdf['low'],
    entries=le, short_entries=se,
    freq='5min',
    td_stop=2,
    time_delta_format=0,
    #sl_stop=5*bdf['natr'],
    #tp_stop=5*bdf['natr'],
    #sl_stop=0.05,
    #tp_stop=0.1,
    slippage=0.0001,
)

In [None]:
pf.stats()

In [None]:
pf.value.plot()

# Backtest OOS

In [None]:
final_model = exp.finalize_model(best[0])

In [None]:
os_df = df.loc[os_start:]
os_df['prediction_label'] = final_model.predict(os_df[utils.get_prefixed_cols(os_df, 'x_')]).values
os_df['prediction_score'] = final_model.predict_proba(os_df[utils.get_prefixed_cols(os_df, 'x_')])[:,1]
os_df['prediction_score'] = np.where(os_df['prediction_label']==1, os_df['prediction_score'], 1-os_df['prediction_score'])

In [None]:
le = os_df['prediction_label'] == 1
le &= os_df['prediction_score'] > 0.8
se = os_df['prediction_label'] == -1
se &= os_df['prediction_score'] > 0.8

le = utils.crossover(le, 0.5)
se = utils.crossover(se, 0.5)

pf = vbt.Portfolio.from_signals(
    os_df['close'], open=os_df['open'], high=os_df['high'], low=os_df['low'],
    entries=le, short_entries=se,
    freq='5min',
    td_stop=2,
    time_delta_format=0,
    sl_stop=4*os_df['natr'],
    #tp_stop=3.75*os_df['natr'],
    slippage=0.0001,
)

In [None]:
pf.stats()

In [None]:
pf.value.plot()

In [None]:
records = pf.trades.records
records['dt'] = os_df.index[records['entry_idx']]
records['exit_dt'] = os_df.index[records['exit_idx']]
records['sl'] = 4*os_df['natr'].iloc[records['entry_idx']].values
records['realized_r'] = records['return']/records['sl']
records = records.set_index('dt')

In [None]:
records['realized_r'].cumsum().vbt.plot().show(renderer='png')

# Rolling Weekly Train-Predict

In [None]:
df['weeknum'] = (df.index.weekday.diff() < 0).cumsum()

In [None]:
training_window = 52

df['prediction_label'] = np.nan
df['prediction_score'] = np.nan

for week in range(training_window+2, df['weeknum'].max()+1):
    train_df = df[df['weeknum'].between(week-training_window, week-1)]
    pred_df = df[df['weeknum']==week]

    df['prediction_label'].update(pd.Series(final_model.predict(pred_df[utils.get_prefixed_cols(pred_df, 'x_')]).values, pred_df.index))
    df['prediction_score'].update(pd.Series(final_model.predict_proba(pred_df[utils.get_prefixed_cols(pred_df, 'x_')])[:,1], pred_df.index))

df['prediction_score'] = np.where(df['prediction_label']==1, df['prediction_score'], 1-df['prediction_score'])

In [None]:
le = df['prediction_label'] == 1
le &= df['prediction_score'] > 0.8
se = df['prediction_label'] == -1
se &= df['prediction_score'] > 0.8

le = utils.crossover(le, 0.5)
se = utils.crossover(se, 0.5)

pf = vbt.Portfolio.from_signals(
    df['close'], open=df['open'], high=df['high'], low=df['low'],
    entries=le, short_entries=se,
    freq='15min',
    td_stop=2,
    time_delta_format=0,
    sl_stop=3*df['natr'],
    tp_stop=5*df['natr'],
    slippage=0.0001,
)

In [None]:
pf.stats()

In [None]:
pf.value.plot()

In [None]:
records = pf.trades.records
records['dt'] = df.index[records['entry_idx']]
records['exit_dt'] = df.index[records['exit_idx']]
#records['sl'] = 0.01
records['sl'] = 3*df['natr'].iloc[records['entry_idx']].values
records['realized_r'] = records['return']/records['sl']
records = records.set_index('dt')

In [None]:
records['realized_r'].cumsum().vbt.plot().show(renderer='png')