In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import f1_score, precision_score, recall_score
import matplotlib.pyplot as plt

In [2]:
data_file_suffix = '20210726'

In [3]:
def train_test_split(X, y, train_idx=None, test_idx=None):
    X_train = X.loc[train_idx]
    y_train = y.loc[train_idx]
    X_test = X.loc[test_idx]
    y_test = y.loc[test_idx]
    return (X_train, y_train, X_test, y_test)


def load_split_data(suffix=None, split=False, window=14):
    if suffix==None:
        suffix='DEFAULT'
        
    X = pd.read_pickle(f'data/X_{suffix}.pkl')
    y = pd.read_pickle(f'data/y_{suffix}.pkl').buy
    
    # Drop NA rows:
    na_rows = X.isna().any(axis=1)
    X = X[~na_rows]
    y = y[~na_rows]
        
    if split:
        X_train, y_train, X_test, y_test = train_test_split(X, y, X.loc[:'2019'].index, X.loc['2020':].index)
        return X_train, y_train, X_test, y_test
    else:
        return X, y
    
X_train, y_train, X_test, y_test = load_split_data(suffix=data_file_suffix, split=True)
#X_train, y_train = load_split_data(suffix=data_file_suffix, split=False)

In [4]:
clf = GradientBoostingClassifier(random_state=0, learning_rate=0.06).fit(X_train, y_train)

In [5]:
from sklearn.metrics import f1_score, precision_score, recall_score

pred=clf.predict(X_test)

print(f'Precision = {round(precision_score(y_test, pred),4)}')
print(f'Recall = {round(recall_score(y_test, pred),4)}')
print(f'F1-Score = {round(f1_score(y_test, pred),4)}')

Precision = 0.5926
Recall = 0.0736
F1-Score = 0.1309


In [13]:
import pickle
import datetime
timestamp = datetime.datetime.now().strftime('%y%m%d%H%M')
print(f'Pickle model file timestamp: {timestamp}')

Pickle model file timestamp: 2107261358


In [10]:
pickle.dump(clf, open(f'models/nm_grad_boost_ethbtc_{timestamp}.pkl', 'wb'))

In [16]:
print(list(X_train.columns))

['open', 'high', 'low', 'close', 'rsi_13', 'ma30_8', 'atr_diff_9', 'cs_hm_9', 'rsi_diff_8', 'low_5', 'high_10', 'low_7', 'cs_hm_14', 'open_5', 'cs_ebu', 'rsi_2', 'atr_ma14_1', 'close_4', 'high_2', 'cs_hmr_9', 'rsi_diff_6', 'ma90', 'open_3', 'trend_up14_13', 'tod', 'atr_3', 'trend_up3_1', 'cs_ss_8', 'ma14_12', 'atr_diff_4', 'trend_up30_10', 'trend_up3_11', 'rsi_ma14_5', 'atr_ma14_2', 'rsi_ma14_9', 'close_13', 'close_12', 'trend_up14_2', 'cs_ssr_5', 'rsi_ma14_14', 'ma30_9', 'trend_up14_8', 'trend_up30_5', 'rsi_ma14_7', 'high_11', 'cs_hm_2', 'rsi_diff', 'cs_ss_7', 'cs_ssr_13', 'cs_ebu_6', 'cs_ebu_8', 'cs_brh_13', 'cs_brh_9', 'number_of_trades', 'ma30', 'atr_11', 'rsi_diff_4', 'cs_ebu_4', 'ma90_5', 'high_13', 'ma90_4', 'cs_ss_3', 'cs_hmr_8', 'cs_ebu_2', 'cs_buh_2', 'trend_up_3', 'trend_up30_2', 'rsi_diff_14', 'atr_ma14_10', 'trend_up_12', 'rsi_diff_5', 'rsi_diff_7', 'trend_up_7', 'atr_diff_6', 'cs_buh_1', 'low_10', 'atr_12', 'cs_ss_10', 'atr_10', 'cs_brh_3', 'cs_ssr_11', 'cs_ebr_11', 'tren