In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier

pd.set_option('display.max_columns', 500)

def load_split_data(suffix=None, split=False, window=14):
    if suffix==None:
        suffix='DEFAULT'
    try:
        X = pd.read_pickle(f'data/X_{suffix}.pkl')
        y = pd.read_pickle(f'data/y_{suffix}.pkl')
    except:
        X, y, _ = build_Xy(df, window=window, use_atr=True, atr_ratio=(20,5), reverse=False, debug=True)
        X.to_pickle(f'data/X_{suffix}.pkl')
        y.to_pickle(f'data/y_{suffix}.pkl')
        
    if split:
        X_train, y_train, X_test, y_test = train_test_split(X, y, X.loc['2018':'2019'].index, X.loc['2020':].index)
        return X_train, y_train, X_test, y_test
    else:
        return X, y
    
X, y = load_split_data(suffix='20210801f')

In [13]:
X.loc['2017-07-14 07:45:00','low']

0.088591

# Remove all past columns

In [2]:
import re

# Drop columns with lookbacks equal to or greater than X
x = 1

columns = list(X.columns)
for c in X.columns:
    if m := re.match(r'^.*_([0-9]+)$', c):
        if int(m[1]) >= x:
            columns.remove(c)
print(columns)

['open', 'high', 'low', 'close', 'dow', 'tod', 'number_of_trades', 'volume', 'quote_asset_volume', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ma14', 'ma30', 'ma90', 'sup14', 'sup30', 'sup90', 'res14', 'res30', 'res90', 'atr', 'atr_diff', 'atr_ma14', 'rsi', 'rsi_diff', 'rsi_ma14', 'trend_up', 'trend_up3', 'trend_up14', 'trend_up30', 'cs_ss', 'cs_ssr', 'cs_hm', 'cs_hmr', 'cs_brh', 'cs_buh', 'cs_ebu', 'cs_ebr']


In [4]:
parameters = {'learning_rate': [0.01,0.03,0.06,0.1],
             }
c = GradientBoostingClassifier(random_state=42)
clf = GridSearchCV(c, parameters, verbose=4, scoring='precision', n_jobs=-1).fit(X.loc['2018':][columns], y.loc['2018':].buy)

Fitting 5 folds for each of 4 candidates, totalling 20 fits


KeyboardInterrupt: 

In [4]:
pd.DataFrame(clf.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,1.515322,0.562378,0.035791,0.001594,5,{'max_depth': 5},0.0,0.0,0.0,0.0,0.0,0.0,0.0,3
1,1.992131,0.068801,0.040429,0.008658,10,{'max_depth': 10},0.305503,0.354167,0.0,0.333333,0.0,0.198601,0.16289,2
2,2.858094,0.093413,0.046232,0.010947,20,{'max_depth': 20},0.271414,0.299425,0.341346,0.249493,0.441441,0.320624,0.06776,1


# Keep only 4 past columns

In [7]:
import re

# Drop columns with lookbacks equal to or greater than X
x = 4

columns = list(X.columns)
for c in X.columns:
    if m := re.match(r'^.*_([0-9]+)$', c):
        if int(m[1]) >= x:
            columns.remove(c)
print(columns)

['open', 'high', 'low', 'close', 'dow', 'tod', 'number_of_trades', 'volume', 'quote_asset_volume', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ma14', 'ma30', 'ma90', 'sup14', 'sup30', 'sup90', 'res14', 'res30', 'res90', 'atr', 'atr_diff', 'atr_ma14', 'rsi', 'rsi_diff', 'rsi_ma14', 'trend_up', 'trend_up3', 'trend_up14', 'trend_up30', 'cs_ss', 'cs_ssr', 'cs_hm', 'cs_hmr', 'cs_brh', 'cs_buh', 'cs_ebu', 'cs_ebr', 'open_1', 'open_2', 'open_3', 'high_1', 'high_2', 'high_3', 'low_1', 'low_2', 'low_3', 'close_1', 'close_2', 'close_3', 'number_of_trades_1', 'number_of_trades_2', 'number_of_trades_3', 'volume_1', 'volume_2', 'volume_3', 'quote_asset_volume_1', 'quote_asset_volume_2', 'quote_asset_volume_3', 'taker_buy_base_asset_volume_1', 'taker_buy_base_asset_volume_2', 'taker_buy_base_asset_volume_3', 'taker_buy_quote_asset_volume_1', 'taker_buy_quote_asset_volume_2', 'taker_buy_quote_asset_volume_3', 'ma14_1', 'ma14_2', 'ma14_3', 'ma30_1', 'ma30_2', 'ma30_3', 'ma90_1', 'm

In [9]:
parameters = {'learning_rate': [0.01,0.03,0.06,0.1],
             }
c = GradientBoostingClassifier(random_state=42)
clf = GridSearchCV(c, parameters, verbose=4, scoring='precision', n_jobs=-1).fit(X.loc['2018':][columns], y.loc['2018':].buy)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
[CV 1/5] END ................................n_estimators=10; total time=   2.9s
[CV 2/5] END ................................n_estimators=10; total time=   2.4s
[CV 3/5] END ................................n_estimators=10; total time=   1.5s
[CV 4/5] END ................................n_estimators=10; total time=   1.7s
[CV 5/5] END ................................n_estimators=10; total time=   1.6s
[CV 1/5] END ...............................n_estimators=100; total time=   8.9s
[CV 2/5] END ...............................n_estimators=100; total time=   8.9s
[CV 3/5] END ...............................n_estimators=100; total time=   8.7s
[CV 4/5] END ...............................n_estimators=100; total time=   9.3s
[CV 5/5] END ...............................n_estimators=100; total time=   9.4s
[CV 1/5] END ...............................n_estimators=500; total time=  43.0s
[CV 2/5] END ...............................n_est

In [10]:
pd.DataFrame(clf.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,2.089923,0.557724,0.032965,0.002483,10,{'n_estimators': 10},0.291142,0.272385,0.276531,0.281983,0.368819,0.298172,0.035877,1
1,9.078121,0.272135,0.069016,0.008681,100,{'n_estimators': 100},0.282807,0.274531,0.245445,0.271926,0.396518,0.294245,0.052648,2
2,43.605615,1.147657,0.23546,0.026942,500,{'n_estimators': 500},0.274211,0.266943,0.264609,0.257717,0.397878,0.292272,0.053065,3


# Keep All columns

In [9]:
parameters = {'learning_rate': [0.01, 0.03],
             }
c = GradientBoostingClassifier(random_state=42)
clf = GridSearchCV(c, parameters, verbose=4, scoring='precision', n_jobs=-1).fit(X.loc['2018':], y.loc['2018':].buy)

Fitting 5 folds for each of 2 candidates, totalling 10 fits


In [8]:
pd.DataFrame(clf.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_learning_rate,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,1074.066483,10.256935,0.137831,0.01394,0.03,{'learning_rate': 0.03},0.319563,0.526718,1.0,0.591463,0.495238,0.586596,0.225462,1
1,1050.64731,21.095573,0.184706,0.053946,0.06,{'learning_rate': 0.06},0.298128,0.505405,0.685714,0.568063,0.451613,0.501785,0.128241,2
2,1047.492748,16.032119,0.173735,0.029217,0.1,{'learning_rate': 0.1},0.295175,0.502607,0.571429,0.535859,0.426923,0.466399,0.098023,3


In [10]:
pd.DataFrame(clf.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_learning_rate,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,917.073078,12.539258,0.140225,0.021077,0.01,{'learning_rate': 0.01},0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
1,906.46684,15.822165,0.145218,0.019352,0.03,{'learning_rate': 0.03},0.319563,0.526718,1.0,0.591463,0.495238,0.586596,0.225462,1


In [10]:
pd.DataFrame(clf.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,3.169878,0.049901,0.086912,0.002649,3,{'max_depth': 3},0.0,0.0,0.0,0.00266,0.0,0.000532,0.001064,5
1,3.75976,0.060456,0.10834,0.038687,4,{'max_depth': 4},0.0,0.0,0.0,0.004149,0.0,0.00083,0.00166,4
2,4.395449,0.091613,0.091219,0.004289,5,{'max_depth': 5},0.0,0.0,0.0,0.018554,0.0,0.003711,0.007422,3
3,4.994452,0.232629,0.083677,0.001196,6,{'max_depth': 6},0.0,0.0,0.0,0.030282,0.0,0.006056,0.012113,1
4,5.643846,0.105016,0.082601,0.000935,7,{'max_depth': 7},0.0,0.0,0.0,0.029875,0.0,0.005975,0.01195,2


In [19]:
pd.DataFrame(clf.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,65.865451,3.137005,0.27115,0.038348,500,{'n_estimators': 500},0.323741,0.136223,0.294727,0.176502,0.33871,0.253981,0.081946,1
1,82.42946,3.592737,0.343688,0.038921,625,{'n_estimators': 625},0.318359,0.136208,0.294879,0.174526,0.344262,0.253647,0.082645,2
2,97.400635,4.000107,0.394655,0.050284,750,{'n_estimators': 750},0.323467,0.133975,0.294872,0.171475,0.328767,0.250511,0.081538,4
3,117.318607,5.299878,0.466274,0.061899,875,{'n_estimators': 875},0.31185,0.131119,0.29395,0.172603,0.305556,0.243016,0.075792,5
4,139.94318,12.713331,0.552024,0.080395,1000,{'n_estimators': 1000},0.307203,0.132584,0.292386,0.174342,0.359375,0.253178,0.085431,3
