In [1]:
from dataset import Dataset
import vectorbt as vbt
import numpy as np
from sklearn.ensemble import IsolationForest

import warnings
warnings.simplefilter(action='ignore')
vbt.settings.set_theme('dark')


In [104]:
TICKER = 'BTCUSDT'
data_binance = Dataset().get_data(days=360, ticker=TICKER, ts='1h')

### Creating Features

In [105]:
cols = []
data = data_binance.copy()
for i in range(0, 10):
    col = f'lag_{i}'
    col_vol = f'lag_vol_{i}'
    cols.append(col)
    cols.append(col_vol)
    data[col] = data['close'].pct_change().shift(i)
    data[col_vol] = data['volume'].pct_change().shift(i)
data = data.dropna()


### Preparing data for a machine learning model. Creation of training and test set

In [106]:
split_size = 0.8
split_len = round(len(data) * split_size)
train = data[:split_len]
test = data[split_len:]
print(len(train), len(test))


6904 1726


### Model optimization
Finding balances between overfitting and generalization

In [110]:
results = []
estimators = np.arange(50, 500, 50)

for n in estimators:
    model = IsolationForest(n_estimators=n, contamination=.03)
    model.fit(train[cols])
    test_predictions = model.predict(test[cols])
    train_predictions = model.predict(train[cols])
    train['outlier'] = np.where(train_predictions == -1, True, False)
    test['outlier'] = np.where(test_predictions == -1, True, False)

    pf_train = vbt.Portfolio.from_signals(
        train['close'],
        np.where(train['outlier'] == True, True, False),
        np.where(train['outlier'].shift(1) == True, True, False)
    )

    pf_test = vbt.Portfolio.from_signals(
        test['close'],
        np.where(test['outlier'] == True, True, False),
        np.where(test['outlier'].shift(1) == True, True, False)
    )
    stats_train = pf_train.stats()
    stats_test = pf_test.stats()
    results.append({
        'estimators': n,
        'train_returns': stats_train['Total Return [%]'],
        'train_profit_factor': stats_train['Profit Factor'],
        'test_returns': stats_test['Total Return [%]'],
        'test_profit_factor': stats_test['Profit Factor'],
        'portfolio': pf_test
    })

### Testing the best model

In [111]:
name_f = 'test_returns'
max_res = max(i[name_f] for i in results)
print(max_res)
best_params = list(
    filter(lambda i: (i[name_f] == max_res), results)
)
best_params[0]['portfolio'].plot(height=400, width=1300).show()


15.695309517641803


In [112]:
best_params[0]['portfolio'].stats()


Start                         2022-01-06 14:00:00
End                           2022-03-19 11:00:00
Period                           71 days 22:00:00
Start Value                                 100.0
End Value                               115.69531
Total Return [%]                         15.69531
Benchmark Return [%]                     -3.50628
Max Gross Exposure [%]                      100.0
Total Fees Paid                               0.0
Max Drawdown [%]                         2.106898
Max Drawdown Duration            23 days 22:00:00
Total Trades                                   27
Total Closed Trades                            27
Total Open Trades                               0
Open Trade PnL                                0.0
Win Rate [%]                            66.666667
Best Trade [%]                           3.637941
Worst Trade [%]                         -1.115368
Avg Winning Trade [%]                    1.050124
Avg Losing Trade [%]                    -0.456945
