In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import f1_score, precision_score, recall_score
import matplotlib.pyplot as plt

In [2]:
data_file_suffix = '20210801f'
clf = AdaBoostClassifier(random_state=42)
model_file_prefix = 'nm_ab_ethbtc_'

In [3]:
def train_test_split(X, y, train_idx=None, test_idx=None):
    X_train = X.loc[train_idx]
    y_train = y.loc[train_idx]
    X_test = X.loc[test_idx]
    y_test = y.loc[test_idx]
    return (X_train, y_train, X_test, y_test)


def load_split_data(suffix=None, split=False, window=14):
    if suffix==None:
        suffix='DEFAULT'
        
    X = pd.read_pickle(f'data/X_{suffix}.pkl')
    y = pd.read_pickle(f'data/y_{suffix}.pkl').buy
    
    # Drop NA rows:
    na_rows = X.isna().any(axis=1)
    X = X[~na_rows]
    y = y[~na_rows]
        
    if split:
        X_train, y_train, X_test, y_test = train_test_split(X, y, X.loc['2018':'2020'].index, X.loc['2021':].index)
        return X_train, y_train, X_test, y_test
    else:
        return X, y
    
X_train, y_train, X_test, y_test = load_split_data(suffix=data_file_suffix, split=True)
#X_train, y_train = load_split_data(suffix=data_file_suffix, split=False)

# Remove all past columns

In [4]:
import re

# Drop columns with lookbacks equal to or greater than X
x = 1

columns = list(X_train.columns)
for c in X_train.columns:
    if m := re.match(r'^.*_([0-9]+)$', c):
        if int(m[1]) >= x:
            columns.remove(c)
print(columns)

['open', 'high', 'low', 'close', 'dow', 'tod', 'number_of_trades', 'volume', 'quote_asset_volume', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ma14', 'ma30', 'ma90', 'sup14', 'sup30', 'sup90', 'res14', 'res30', 'res90', 'atr', 'atr_diff', 'atr_ma14', 'rsi', 'rsi_diff', 'rsi_ma14', 'trend_up', 'trend_up3', 'trend_up14', 'trend_up30', 'cs_ss', 'cs_ssr', 'cs_hm', 'cs_hmr', 'cs_brh', 'cs_buh', 'cs_ebu', 'cs_ebr']


In [5]:
%time clf = clf.fit(X_train[columns], y_train)
pred=clf.predict(X_test[columns])

print(f'Precision = {round(precision_score(y_test, pred),4)}')
print(f'Recall = {round(recall_score(y_test, pred),4)}')
print(f'F1-Score = {round(f1_score(y_test, pred),4)}')

Wall time: 10.8 s
Precision = 0.0
Recall = 0.0
F1-Score = 0.0


  _warn_prf(average, modifier, msg_start, len(result))


# Keep only 4 past columns

In [6]:
import re

# Drop columns with lookbacks equal to or greater than X
x = 4

columns = list(X_train.columns)
for c in X_train.columns:
    if m := re.match(r'^.*_([0-9]+)$', c):
        if int(m[1]) >= x:
            columns.remove(c)
print(columns)

['open', 'high', 'low', 'close', 'dow', 'tod', 'number_of_trades', 'volume', 'quote_asset_volume', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ma14', 'ma30', 'ma90', 'sup14', 'sup30', 'sup90', 'res14', 'res30', 'res90', 'atr', 'atr_diff', 'atr_ma14', 'rsi', 'rsi_diff', 'rsi_ma14', 'trend_up', 'trend_up3', 'trend_up14', 'trend_up30', 'cs_ss', 'cs_ssr', 'cs_hm', 'cs_hmr', 'cs_brh', 'cs_buh', 'cs_ebu', 'cs_ebr', 'open_1', 'open_2', 'open_3', 'high_1', 'high_2', 'high_3', 'low_1', 'low_2', 'low_3', 'close_1', 'close_2', 'close_3', 'number_of_trades_1', 'number_of_trades_2', 'number_of_trades_3', 'volume_1', 'volume_2', 'volume_3', 'quote_asset_volume_1', 'quote_asset_volume_2', 'quote_asset_volume_3', 'taker_buy_base_asset_volume_1', 'taker_buy_base_asset_volume_2', 'taker_buy_base_asset_volume_3', 'taker_buy_quote_asset_volume_1', 'taker_buy_quote_asset_volume_2', 'taker_buy_quote_asset_volume_3', 'ma14_1', 'ma14_2', 'ma14_3', 'ma30_1', 'ma30_2', 'ma30_3', 'ma90_1', 'm

In [7]:
%time clf = clf.fit(X_train[columns], y_train)
pred=clf.predict(X_test[columns])

print(f'Precision = {round(precision_score(y_test, pred),4)}')
print(f'Recall = {round(recall_score(y_test, pred),4)}')
print(f'F1-Score = {round(f1_score(y_test, pred),4)}')

Wall time: 40 s
Precision = 0.0
Recall = 0.0
F1-Score = 0.0


  _warn_prf(average, modifier, msg_start, len(result))


# Keep All columns

In [8]:
columns = list(X_train.columns)

In [9]:
%time clf = clf.fit(X_train, y_train)
pred=clf.predict(X_test)

print(f'Precision = {round(precision_score(y_test, pred),4)}')
print(f'Recall = {round(recall_score(y_test, pred),4)}')
print(f'F1-Score = {round(f1_score(y_test, pred),4)}')

Wall time: 2min 27s
Precision = 0.5263
Recall = 0.0016
F1-Score = 0.0031


# Package Model

In [10]:
print('Number of Trades', pred.sum())
print('Percent Trades', pred.sum()/len(pred))

Number of Trades 19
Percent Trades 0.0009488139825218477


In [11]:
import pickle
import datetime
timestamp = datetime.datetime.now().strftime('%y%m%d%H%M')
print(f'Pickle model file timestamp: {timestamp}')

Pickle model file timestamp: 2108021552


In [12]:
filename = f'models/{model_file_prefix}{timestamp}.pkl'
pickle.dump(clf, open(filename, 'wb'))
print('Saved', filename)

Saved models/nm_ab_ethbtc_2108021552.pkl


In [15]:
print(columns)

['open', 'high', 'low', 'close', 'dow', 'tod', 'number_of_trades', 'volume', 'quote_asset_volume', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ma14', 'ma30', 'ma90', 'sup14', 'sup30', 'sup90', 'res14', 'res30', 'res90', 'atr', 'atr_diff', 'atr_ma14', 'rsi', 'rsi_diff', 'rsi_ma14', 'trend_up', 'trend_up3', 'trend_up14', 'trend_up30', 'cs_ss', 'cs_ssr', 'cs_hm', 'cs_hmr', 'cs_brh', 'cs_buh', 'cs_ebu', 'cs_ebr', 'open_1', 'open_2', 'open_3', 'high_1', 'high_2', 'high_3', 'low_1', 'low_2', 'low_3', 'close_1', 'close_2', 'close_3', 'number_of_trades_1', 'number_of_trades_2', 'number_of_trades_3', 'volume_1', 'volume_2', 'volume_3', 'quote_asset_volume_1', 'quote_asset_volume_2', 'quote_asset_volume_3', 'taker_buy_base_asset_volume_1', 'taker_buy_base_asset_volume_2', 'taker_buy_base_asset_volume_3', 'taker_buy_quote_asset_volume_1', 'taker_buy_quote_asset_volume_2', 'taker_buy_quote_asset_volume_3', 'ma14_1', 'ma14_2', 'ma14_3', 'ma30_1', 'ma30_2', 'ma30_3', 'ma90_1', 'm

# Analyze Model

In [14]:
date = '2021-06-25'
df = pd.DataFrame(y_test)
df['pred'] = pred
df[df.pred==1].loc[date]

Unnamed: 0_level_0,buy,pred
open_time,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-06-25 01:15:00,1,1
2021-06-25 14:00:00,1,1
