In [1]:
import vectorbt as vb
import numpy as np
import pandas as pd

from datetime import datetime
from utils import binance_resample, yf_resample, pt_to_strength
from signals import OscillatorSignals


start_date = datetime(2021, 6, 1)

vb.settings.array_wrapper['freq'] = 'd'
vb.settings.plotting['layout']['template'] = 'vbt_dark'
vb.settings.portfolio['init_cash'] = 1000.0
vb.settings.portfolio['fees'] = 0.001
vb.settings.portfolio['slippage'] = 0.002

In [2]:
# Get SPY data
spy_raw = vb.YFData.download('SPY', start=start_date, end='now UTC', interval='1h')
spy = yf_resample(spy_raw.data['SPY'], '4H')

# Backtest

In [3]:
use = 'BTCUSDT'

bdata = vb.BinanceData.download(use, start=start_date, end='now UTC', interval='1h')
data = pd.concat({i: binance_resample(x, '4H') for i, x in bdata.data.items()}, axis=1)

2021-05-31 17:00:00+00:00 - 2022-02-10 21:00:00+00:00: : 13it [00:08,  1.51it/s]


In [4]:
price = data[f'{use}'].close
vol = data[f'{use}'].volume
logvol = np.log(1 + vol).rename('logvol')
ntrades = data[f'{use}'].ntrades

# Peak Finding: Annotations

Three ideas for annotations:
* Directional (1 and -1): Could use both classifier and regressor
* Peak strength: Depending on order of peak
* Convolve a kernel by peak strength: Gaussian kernel, perhaps?

In [5]:
from find import ExtremeFind

ef = ExtremeFind()
iprice = ef.annotate_direction(price)

Stopped at 6th degree in peak finding.
Stopped at 6th degree in trough finding.


## Ideal Portfolio

In [6]:
# Buys on trough level 2+ and sells on peak level 2+
portfolio = vb.Portfolio.from_signals(price, iprice['trough'] > 1, iprice['peak'] > 1)
portfolio.stats()

Start                         2021-05-31 16:00:00+00:00
End                           2022-02-10 20:00:00+00:00
Period                               1532 days 00:00:00
Start Value                                      1000.0
End Value                                  69429.872723
Total Return [%]                            6842.987272
Benchmark Return [%]                           19.61701
Max Gross Exposure [%]                            100.0
Total Fees Paid                             3116.774092
Max Drawdown [%]                               7.847983
Max Drawdown Duration                  35 days 00:00:00
Total Trades                                         90
Total Closed Trades                                  89
Total Open Trades                                     1
Open Trade PnL                              1523.352698
Win Rate [%]                                  96.629213
Best Trade [%]                                 21.35879
Worst Trade [%]                               -4

In [7]:
rec = portfolio.trades.records

spy_close = spy.close.ffill().rename('spy_close')
spy_ret = spy_close.pct_change().rename('spy_ret')
spy_vol = spy.volume.ffill().rename('spy_vol')

## Semi-Ideal Portfolio
With proxy for ideal signal

In [8]:
# tp = iprice['trough'].replace(0, np.nan).interpolate() - iprice['peak'].replace(0, np.nan).interpolate()
# tpsig = tp.rolling(12).mean()

# Gaussian filter convolution on peak-trough value
tpsig = pt_to_strength(iprice.pt, sigma=2.5)
tpsig_thresh = 0.15

# Trough-peak signal
portfolio = vb.Portfolio.from_signals(price, tpsig < -tpsig_thresh, tpsig > tpsig_thresh)
portfolio.stats()

Start                          2021-05-31 16:00:00+00:00
End                            2022-02-10 20:00:00+00:00
Period                                1532 days 00:00:00
Start Value                                       1000.0
End Value                                    4713.136546
Total Return [%]                              371.313655
Benchmark Return [%]                            19.61701
Max Gross Exposure [%]                             100.0
Total Fees Paid                               232.188269
Max Drawdown [%]                               17.981166
Max Drawdown Duration                  196 days 00:00:00
Total Trades                                          47
Total Closed Trades                                   47
Total Open Trades                                      0
Open Trade PnL                                       0.0
Win Rate [%]                                   80.851064
Best Trade [%]                                 22.996325
Worst Trade [%]                

In [9]:
portfolio.trades.plot()

FigureWidget({
    'data': [{'line': {'color': '#1f77b4'},
              'name': 'Close',
              'showl…

# Feature Engineering

In [21]:
# Set up base dataframe
usecols = [price, vol, logvol, ntrades, spy_close, spy_ret, spy_vol]
df = pd.concat(usecols, axis=1).ffill().reset_index()

# Standard peak/trough annotation
df['direction'] = 0
df.loc[(iprice['trough'] > 1).values, 'direction'] = 1
df.loc[(iprice['peak'] > 1).values, 'direction'] = -1
df['tp'] = tpsig.values

df.set_index('index', inplace=True)

## Features

In [22]:
osig = OscillatorSignals(price)
osig_spy = OscillatorSignals(spy_close)

df['rsi'] = osig.rsi
df['bband_pos'] = osig.bband_position()

# df['ma12h'] = df.close/df.close.rolling(3).mean()
df['ma2d'] = df.close/df.close.rolling(12).mean()
df['ma7d'] = df.close/df.close.rolling(42).mean()
# df['ntrades_ma12h'] = df.ntrades/df.ntrades.rolling(3).mean()
# df['ntrades_ma2d'] = df.ntrades/df.ntrades.rolling(12).mean()
df['ntrades_ma7d'] = df.ntrades/df.ntrades.rolling(42).mean()
# df['vol_ma12h'] = df.volume/df.volume.rolling(3).mean()
# df['vol_ma2d'] = df.volume/df.volume.rolling(12).mean()
df['logvol'] = df.logvol
df['vol_ma7d'] = df.volume/df.volume.rolling(42).mean()
df['vol2d'] = df.close.pct_change().rolling(12).std()
df['vol7d'] = df.close.pct_change().rolling(42).std()

df['spy_rsi'] = osig_spy.rsi
df['spy_bband_pos'] = osig_spy.bband_position()

# df['spy_ret_3d'] = df.spy_ret.rolling(18).sum()
df['spy_ret_7d'] = df.spy_ret.rolling(42).sum()
# df['spy_ma1d'] = df.spy_close/df.spy_close.rolling(6).mean()
df['spy_ma7d'] = df.spy_close/df.spy_close.rolling(42).mean()
df['spy_vol_ma7d'] = df.spy_vol/df.spy_vol.rolling(42).apply(lambda x: np.nanmean(x))
df['spy_active'] = df.spy_vol > 0
df['spy_vol7d'] = df.spy_close.pct_change().rolling(42).std()

In [23]:
def ts_cv_indexer(n, start=0.5, cv=5):
    '''
    Indexer for time-series cumulative cross-validation
    '''
    ranges = n*np.linspace(start, 1, cv+1)
    ranges = ranges.astype(int)
    out = []
    for i, x in enumerate(ranges):
        if i > 0:
            out.append([ranges[i-1], x])
    return out

In [24]:
use = 'tp'
ncols = len(usecols) + 2
# Feature columns after which column
features_after = 8

features = df.ffill().dropna()
cvprices = []
cvtrains, cvtests = [], []
indexes = ts_cv_indexer(len(df), start=0.4, cv=3)
for train_end, test_end in indexes:
    train, test = features.head(train_end), features.iloc[train_end:test_end]
    xtrain, ytrain = train.iloc[:, ncols:], getattr(train, use)
    xtest, ytest = test.iloc[:, ncols:], getattr(test, use)
    cvprices.append([train.close, test.close])
    cvtrains.append([xtrain, ytrain])
    cvtests.append([xtest, ytest])

In [25]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, ExtraTreesRegressor
from sklearn.linear_model import Lasso
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score

In [26]:
models = [
    RandomForestRegressor(n_estimators=100, max_depth=4),
    GradientBoostingRegressor(n_estimators=100, subsample=0.6, learning_rate=2e-2, max_depth=4, warm_start=False),
    # ExtraTreesRegressor(max_depth=4),
    MLPRegressor(hidden_layer_sizes=(64, 16))
]

result = []
preds = []
var_ratio = []
for (xtrain, ytrain), (xtest, ytest) in zip(cvtrains, cvtests):
    trains = []
    temp = []
    for model in models:
        model.fit(xtrain, ytrain)
        trains.append(model.predict(xtrain))
        temp.append(model.predict(xtest))
    tpred = np.mean(trains, axis=0)
    pred = np.mean(temp, axis=0)
    train_score, test_score = r2_score(ytrain, tpred), r2_score(ytest, pred)
    result.append({'train': train_score, 'test': test_score})
    preds.append(pred)
    var_ratio.append([pred.std()/ytest.std()])

pd.DataFrame(result)

Unnamed: 0,train,test
0,0.604269,0.25765
1,0.572694,0.211716
2,0.523854,0.077726


In [28]:
# Geometric mean of CV variance ratios
print(np.exp(np.log(var_ratio).mean()))

# Adjust threshold with variance ratio
variance_adj_ratio = 0.6
thresh = variance_adj_ratio*tpsig_thresh

0.5906829589362724


In [29]:
trades = []
allstats = []
for p, (_, pr) in zip(preds, cvprices):
    buy = pd.Series(p).vbt.crossed_above(thresh)
    sell = pd.Series(p).vbt.crossed_below(-thresh)
    
    portfolio = vb.Portfolio.from_signals(pr, buy, sell)
    s = portfolio.stats()
    trades.append(portfolio.trades)
    allstats.append(s)

# CV Results

In [30]:
pd.concat(allstats, axis=1)

Unnamed: 0,0,1,2
Start,2021-09-18 12:00:00+00:00,2021-11-08 16:00:00+00:00,2021-12-29 16:00:00+00:00
End,2021-11-08 12:00:00+00:00,2021-12-29 12:00:00+00:00,2022-02-10 20:00:00+00:00
Period,307 days 00:00:00,306 days 00:00:00,260 days 00:00:00
Start Value,1000.0,1000.0,1000.0
End Value,1320.283066,794.395596,946.387646
Total Return [%],32.028307,-20.56044,-5.361235
Benchmark Return [%],37.105078,-27.152195,-6.476098
Max Gross Exposure [%],100.0,100.0,100.0
Total Fees Paid,11.626255,10.429759,9.755998
Max Drawdown [%],19.260574,20.56044,19.243821


In [31]:
trades[2].plot()

FigureWidget({
    'data': [{'line': {'color': '#1f77b4'},
              'name': 'Close',
              'showl…

In [None]:
metrics = ['Total Return [%]', 'Benchmark Return [%]', 'Max Drawdown [%]', 'Total Trades', 'Win Rate [%]', 'Sharpe Ratio', 'Sortino Ratio']

stats_df = portfolio.stats()
stats_df[metrics]

In [None]:
portfolio.trades.plot()

In [None]:
p = pd.Series(model.predict(xtest))
buy = p.vbt.crossed_above(tpsig_thresh)
sell = p.vbt.crossed_below(-tpsig_thresh)

portfolio = vb.Portfolio.from_signals(test.close, buy, sell)
portfolio.stats()

In [None]:
portfolio.trades.plot()

In [None]:
p.plot()