In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import mplfinance as mpf
import numpy as np

pd.set_option('display.max_columns', 500)

def train_test_split(X, y, train_idx=None, test_idx=None):
    X_train = X.loc[train_idx]
    y_train = y.loc[train_idx]
    X_test = X.loc[test_idx]
    y_test = y.loc[test_idx]
    return (X_train, y_train, X_test, y_test)

def load_split_data(suffix=None, split=False, window=14):
    if suffix==None:
        suffix='DEFAULT'
    try:
        X = pd.read_pickle(f'data/X_{suffix}.pkl')
        y = pd.read_pickle(f'data/y_{suffix}.pkl')
    except:
        X, y, _ = build_Xy(df, window=window, use_atr=True, atr_ratio=(20,5), reverse=False, debug=True)
        X.to_pickle(f'data/X_{suffix}.pkl')
        y.to_pickle(f'data/y_{suffix}.pkl')
        
    if split:
        X_train, y_train, X_test, y_test = train_test_split(X, y, X.loc['2018':'2020'].index, X.loc['2021':].index)
        return X_train, y_train, X_test, y_test
    else:
        return X, y
    
data_file_suffix = '20210801f'
X, y = load_split_data(suffix=data_file_suffix)
X_train, y_train, X_test, y_test = load_split_data(suffix=data_file_suffix, split=True)

In [5]:
y_train.sum()/len(y_train)

buy    0.5
dtype: float64

In [4]:
# Balance data...
add_buys = (y_train.buy==0).sum() - (y_train.buy==1).sum()

y_toadd = y_train[y_train.buy==1].sample(n=add_buys, replace=True, random_state=42)
x_toadd = X_train[y_train.buy==1].sample(n=add_buys, replace=True, random_state=42)

X_train = pd.concat([X_train,x_toadd])
y_train = pd.concat([y_train,y_toadd])

# Check Various Classifiers

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
import xgboost as xgb

def print_scores(clf,X,y):
    print(f"2017 acc = {clf.score(X.loc['2017'], y.loc['2017'])} - " + \
          f"f1 = {f1_score(y.loc['2017'],clf.predict(X.loc['2017']))} - " + \
          f"prec = {precision_score(y.loc['2017'],clf.predict(X.loc['2017']), zero_division=0)} - " + \
          f"recl = {recall_score(y.loc['2017'],clf.predict(X.loc['2017']), zero_division=0)}"
         )
    print(f"2018 acc = {clf.score(X.loc['2018'], y.loc['2018'])} - " + \
          f"f1 = {f1_score(y.loc['2018'],clf.predict(X.loc['2018']))} - " + \
          f"prec = {precision_score(y.loc['2018'],clf.predict(X.loc['2018']), zero_division=0)} - " + \
          f"recl = {recall_score(y.loc['2017'],clf.predict(X.loc['2017']), zero_division=0)}"
         )
    print(f"2019 acc = {clf.score(X.loc['2019'], y.loc['2019'])} - " + \
          f"f1 = {f1_score(y.loc['2019'],clf.predict(X.loc['2019']))} - " + \
          f"prec = {precision_score(y.loc['2019'],clf.predict(X.loc['2019']), zero_division=0)} - " + \
          f"recl = {recall_score(y.loc['2017'],clf.predict(X.loc['2017']), zero_division=0)}"
         )
    print(f"2020 acc = {clf.score(X.loc['2020'], y.loc['2020'])} - " + \
          f"f1 = {f1_score(y.loc['2020'],clf.predict(X.loc['2020']))} - " + \
          f"prec = {precision_score(y.loc['2020'],clf.predict(X.loc['2020']), zero_division=0)} - " + \
          f"recl = {recall_score(y.loc['2017'],clf.predict(X.loc['2017']), zero_division=0)}"
         )
    print(f"2021 acc = {clf.score(X.loc['2021'], y.loc['2021'])} - " + \
          f"f1 = {f1_score(y.loc['2021'],clf.predict(X.loc['2021']))} - " + \
          f"prec = {precision_score(y.loc['2021'],clf.predict(X.loc['2021']), zero_division=0)} - " + \
          f"recl = {recall_score(y.loc['2021'],clf.predict(X.loc['2021']), zero_division=0)}"
         )


clfs = {'Logistic Regression' : LogisticRegression(random_state=42, max_iter=10000),
        'Naive Bayes' : GaussianNB(),
        'Random Forest' : RandomForestClassifier(random_state=42, n_jobs=-1),
        'AdaBoost' : AdaBoostClassifier(random_state=42),
        'Gradient Boost': GradientBoostingClassifier(random_state=42),
        'XGBoost' : xgb.XGBClassifier(n_jobs=-1, random_state=42, use_label_encoder=False),
        'MLPClassifier (pipeline)' : make_pipeline(StandardScaler(),MLPClassifier(random_state=42)),
       }

In [4]:
import warnings
warnings.filterwarnings(action='ignore', category=UserWarning)

for k,clf in clfs.items():
    print(f'{k}:')
    %time clf.fit(X_train, y_train)
    print_scores(clf,X,y)
    print(' ')

Logistic Regression:
Wall time: 2min
2017 acc = 0.6926557818137344 - f1 = 0.08049762166117819 - prec = 0.16430171769977595 - recl = 0.05330748727889508
2018 acc = 0.5045233924007008 - f1 = 0.4319768207559595 - prec = 0.3215686274509804 - recl = 0.05330748727889508
2019 acc = 0.636142370357644 - f1 = 0.29651774345346843 - prec = 0.28344623200677393 - recl = 0.05330748727889508
2020 acc = 0.6582415701489132 - f1 = 0.3221681566142356 - prec = 0.32348596750369274 - recl = 0.05330748727889508
2021 acc = 0.4569787765293383 - f1 = 0.4352342370416537 - prec = 0.3257658218006531 - recl = 0.6555068836045056
 
Naive Bayes:
Wall time: 1.18 s
2017 acc = 0.5882712652112763 - f1 = 0.26615803814713895 - prec = 0.24187797147385104 - recl = 0.2958565543978677
2018 acc = 0.495103248226543 - f1 = 0.4259779272513551 - prec = 0.31582260094896875 - recl = 0.2958565543978677
2019 acc = 0.6151819717664576 - f1 = 0.32170796951496494 - prec = 0.28460439364172174 - recl = 0.2958565543978677
2020 acc = 0.617989387

### Data: 20210801f
Using `X, y = load_split_data(suffix='20210801f')`
**NOTE** Same as previous but balances data

<details>
    <summary><b>Click to show/hide details</b></summary>
    
    ```
    Logistic Regression:
    Wall time: 2min
    2017 acc = 0.6926557818137344 - f1 = 0.08049762166117819 - prec = 0.16430171769977595 - recl = 0.05330748727889508
    2018 acc = 0.5045233924007008 - f1 = 0.4319768207559595 - prec = 0.3215686274509804 - recl = 0.05330748727889508
    2019 acc = 0.636142370357644 - f1 = 0.29651774345346843 - prec = 0.28344623200677393 - recl = 0.05330748727889508
    2020 acc = 0.6582415701489132 - f1 = 0.3221681566142356 - prec = 0.32348596750369274 - recl = 0.05330748727889508
    2021 acc = 0.4569787765293383 - f1 = 0.4352342370416537 - prec = 0.3257658218006531 - recl = 0.6555068836045056

    Naive Bayes:
    Wall time: 1.18 s
    2017 acc = 0.5882712652112763 - f1 = 0.26615803814713895 - prec = 0.24187797147385104 - recl = 0.2958565543978677
    2018 acc = 0.495103248226543 - f1 = 0.4259779272513551 - prec = 0.31582260094896875 - recl = 0.2958565543978677
    2019 acc = 0.6151819717664576 - f1 = 0.32170796951496494 - prec = 0.28460439364172174 - recl = 0.2958565543978677
    2020 acc = 0.6179893878016774 - f1 = 0.35237220099627603 - prec = 0.3086241951880718 - recl = 0.2958565543978677
    2021 acc = 0.5366292134831461 - f1 = 0.42498605688789737 - prec = 0.3518727552591072 - recl = 0.5364518147684606

    Random Forest:
    Wall time: 17.1 s
    2017 acc = 0.7279398275545771 - f1 = 0.05199232900063925 - prec = 0.21554770318021202 - recl = 0.02956142476375091
    2018 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.02956142476375091
    2019 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.02956142476375091
    2020 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.02956142476375091
    2021 acc = 0.6516354556803995 - f1 = 0.23171806167400885 - prec = 0.3913690476190476 - recl = 0.16458072590738423

    AdaBoost:
    Wall time: 3min 32s
    2017 acc = 0.6694796061884669 - f1 = 0.18143268211419053 - prec = 0.24192245557350567 - recl = 0.1451417494548098
    2018 acc = 0.5194003274074499 - f1 = 0.47420348143027713 - prec = 0.3452914798206278 - recl = 0.1451417494548098
    2019 acc = 0.6048735790166939 - f1 = 0.3925694413875071 - prec = 0.31619628421500495 - recl = 0.1451417494548098
    2020 acc = 0.6088891424659097 - f1 = 0.40556711758584807 - prec = 0.3295750827989571 - recl = 0.1451417494548098
    2021 acc = 0.4648689138576779 - f1 = 0.4407098121085595 - prec = 0.3306704260651629 - recl = 0.6605131414267835

    Gradient Boost:
    Wall time: 18min 33s
    2017 acc = 0.643796245337247 - f1 = 0.21464203856006472 - prec = 0.2419452887537994 - recl = 0.19287618124545675
    2018 acc = 0.5440133260576122 - f1 = 0.5108598539696233 - prec = 0.3687019166629608 - recl = 0.19287618124545675
    2019 acc = 0.6398648455172808 - f1 = 0.44965650024066867 - prec = 0.3608652900688299 - recl = 0.19287618124545675
    2020 acc = 0.6274890169452845 - f1 = 0.4528159570901777 - prec = 0.3604162497498499 - recl = 0.19287618124545675
    2021 acc = 0.45578027465667914 - f1 = 0.45808055693684735 - prec = 0.3357632307916606 - recl = 0.7205882352941176

    XGBoost:
    [23:40:20] WARNING: ..\src\learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
    Wall time: 52.7 s
    2017 acc = 0.694857212743839 - f1 = 0.14584046559397468 - prec = 0.24839650145772596 - recl = 0.10322267991276957
    2018 acc = 0.8090697607628019 - f1 = 0.731350521296371 - prec = 0.6124949235142818 - recl = 0.10322267991276957
    2019 acc = 0.8477794003951551 - f1 = 0.7413390424289606 - prec = 0.6381837982742733 - recl = 0.10322267991276957
    2020 acc = 0.8369658241570149 - f1 = 0.7335540118420439 - prec = 0.6255566157760815 - recl = 0.10322267991276957
    2021 acc = 0.5212484394506867 - f1 = 0.4066348950919106 - prec = 0.33640552995391704 - recl = 0.5139236545682102

    MLPClassifier (pipeline):
    Wall time: 7min 15s
    2017 acc = 0.6877637130801688 - f1 = 0.19029495718363462 - prec = 0.27535566773749426 - recl = 0.1453840562151684
    2018 acc = 0.8300353255406531 - f1 = 0.7328940241920926 - prec = 0.666365725541694 - recl = 0.1453840562151684
    2019 acc = 0.8234401397359906 - f1 = 0.6826230183240682 - prec = 0.6132433182280588 - recl = 0.1453840562151684
    2020 acc = 0.8347977406287442 - f1 = 0.7069183663140848 - prec = 0.6415579643578909 - recl = 0.1453840562151684
    2021 acc = 0.575330836454432 - f1 = 0.3288082083662195 - prec = 0.33179356482956357 - recl = 0.32587609511889865
    ```
</details>

### Data: 20210801f
Using `X, y = load_split_data(suffix='20210801f')`
**NOTE** Uses 0.01,0.005 threshold_ratio

<details>
    <summary><b>Click to show/hide details</b></summary>
    
    ```
    Logistic Regression:
    Wall time: 2min 57s
    2017 acc = 0.7476304042071791 - f1 = 0.0 - prec = 0.0 - recl = 0.0
    2018 acc = 0.7128292024469399 - f1 = 0.0015976035946080878 - prec = 0.18604651162790697 - recl = 0.0
    2019 acc = 0.7532285313403774 - f1 = 0.001159017153453871 - prec = 0.38461538461538464 - recl = 0.0
    2020 acc = 0.7463342271923318 - f1 = 0.0024680278214045323 - prec = 0.2682926829268293 - recl = 0.0
    2021 acc = 0.6759051186017478 - f1 = 0.022590361445783132 - prec = 0.3024193548387097 - recl = 0.011733416770963704

    Naive Bayes:
    Wall time: 883 ms
    2017 acc = 0.6316883752216719 - f1 = 0.22433998712169995 - prec = 0.2394172622319956 - recl = 0.2110491882723528
    2018 acc = 0.5456790832591402 - f1 = 0.4059037818755399 - prec = 0.32446712698889224 - recl = 0.2110491882723528
    2019 acc = 0.6551842625204021 - f1 = 0.274578313253012 - prec = 0.28541014402003756 - recl = 0.2110491882723528
    2020 acc = 0.6571860557996234 - f1 = 0.3164002502986518 - prec = 0.319434872501723 - recl = 0.2110491882723528
    2021 acc = 0.5695380774032459 - f1 = 0.3970341354224958 - prec = 0.3590587044534413 - recl = 0.44399249061326657

    Random Forest:
    Wall time: 15.3 s
    2017 acc = 0.5420412156790803 - f1 = 0.3202323681583008 - prec = 0.2560232220609579 - recl = 0.4274291252725951
    2018 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.4274291252725951
    2019 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.4274291252725951
    2020 acc = 0.9999714725851543 - f1 = 0.9999436460974923 - prec = 1.0 - recl = 0.4274291252725951
    2021 acc = 0.6550312109862672 - f1 = 0.16710875331564987 - prec = 0.3643533123028391 - recl = 0.10841677096370463

    AdaBoost:
    Wall time: 2min 33s
    2017 acc = 0.7454289732770746 - f1 = 0.012336892052194544 - prec = 0.29545454545454547 - recl = 0.006299975769323964
    2018 acc = 0.714925758924725 - f1 = 0.017616785431512273 - prec = 0.6742424242424242 - recl = 0.006299975769323964
    2019 acc = 0.7540016607966097 - f1 = 0.011506155793349442 - prec = 0.6578947368421053 - recl = 0.006299975769323964
    2020 acc = 0.7470474125634735 - f1 = 0.002025886325267304 - prec = 0.75 - recl = 0.006299975769323964
    2021 acc = 0.6808489388264669 - f1 = 0.0031196381219778507 - prec = 0.5263157894736842 - recl = 0.0015644555694618273

    Gradient Boost:
    Wall time: 12min 47s
    2017 acc = 0.7174218797774109 - f1 = 0.08185972580965625 - prec = 0.22737306843267108 - recl = 0.049915192633874485
    2018 acc = 0.7200091903845601 - f1 = 0.04580600959185671 - prec = 0.9551020408163265 - recl = 0.049915192633874485
    2019 acc = 0.7554620164361595 - f1 = 0.018390804597701146 - prec = 0.9411764705882353 - recl = 0.049915192633874485
    2020 acc = 0.7482170365721458 - f1 = 0.010759919300605245 - prec = 0.9795918367346939 - recl = 0.049915192633874485
    2021 acc = 0.6802496878901373 - f1 = 0.0006243171531137817 - prec = 0.13333333333333333 - recl = 0.00031289111389236547

    XGBoost:
    [18:17:41] WARNING: ..\src\learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
    Wall time: 20min 3s
    2017 acc = 0.7183391426649545 - f1 = 0.09863013698630137 - prec = 0.25635808748728384 - recl = 0.06106130361037073
    2018 acc = 0.8457164191964157 - f1 = 0.64267659970733 - prec = 0.9543658632951403 - recl = 0.06106130361037073
    2019 acc = 0.8418520745640409 - f1 = 0.5393277170739845 - prec = 0.9582098399525786 - recl = 0.06106130361037073
    2020 acc = 0.8349689051178182 - f1 = 0.5294069795818759 - prec = 0.9514619883040936 - recl = 0.06106130361037073
    2021 acc = 0.6649687890137328 - f1 = 0.10247491638795986 - prec = 0.3536472760849492 - recl = 0.059918648310387984

    MLPClassifier (pipeline):
    Wall time: 5min 38s
    2017 acc = 0.7124686601846756 - f1 = 0.12177810982443033 - prec = 0.2656886715566422 - recl = 0.07899200387690816
    2018 acc = 0.838450271403544 - f1 = 0.6748742847234264 - prec = 0.7965616045845272 - recl = 0.07899200387690816
    2019 acc = 0.842940182687627 - f1 = 0.6299669432638467 - prec = 0.7520940721649485 - recl = 0.07899200387690816
    2020 acc = 0.8503166543047869 - f1 = 0.6641490110734174 - prec = 0.7685925925925926 - recl = 0.07899200387690816
    2021 acc = 0.6127340823970038 - f1 = 0.2724458204334365 - prec = 0.3402859151628779 - recl = 0.22715894868585731
    ```
</details>

### Data: 20210801e
Using `X, y = load_split_data(suffix='20210801e')`
**NOTE** Uses 0.005,0.0025 threshold_ratio

<details>
    <summary><b>Click to show/hide details</b></summary>
    
    ```
    Logistic Regression:
    Wall time: 1min 58s
    2017 acc = 0.6508897450009172 - f1 = 0.06055619549119631 - prec = 0.5609756097560976 - recl = 0.03200556618542355
    2018 acc = 0.5911140469284012 - f1 = 0.15901707129777307 - prec = 0.5578118524658102 - recl = 0.03200556618542355
    2019 acc = 0.5925035077169774 - f1 = 0.07764599131505606 - prec = 0.5245183887915937 - recl = 0.03200556618542355
    2020 acc = 0.6051235237062818 - f1 = 0.0618137454249695 - prec = 0.5428571428571428 - recl = 0.03200556618542355
    2021 acc = 0.581772784019975 - f1 = 0.12769503176752423 - prec = 0.5 - recl = 0.07319402985074627

    Naive Bayes:
    Wall time: 851 ms
    2017 acc = 0.3830489818381948 - f1 = 0.5208264070292092 - prec = 0.3582255324709264 - recl = 0.9537310836667247
    2018 acc = 0.47767023751400095 - f1 = 0.5726035767160953 - prec = 0.4345329386168278 - recl = 0.9537310836667247
    2019 acc = 0.43381152821922514 - f1 = 0.5609999777980063 - prec = 0.410808350133316 - recl = 0.9537310836667247
    2020 acc = 0.452159525303817 - f1 = 0.5297747306562194 - prec = 0.40176780806655277 - recl = 0.9537310836667247
    2021 acc = 0.5249937578027466 - f1 = 0.49613306494332016 - prec = 0.4458726078263353 - recl = 0.5591641791044776

    Random Forest:
    Wall time: 15.4 s
    2017 acc = 0.5553721029780468 - f1 = 0.37710956909106486 - prec = 0.37153950033760974 - recl = 0.38284919116368066
    2018 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.38284919116368066
    2019 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.38284919116368066
    2020 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.38284919116368066
    2021 acc = 0.5822721598002497 - f1 = 0.290921420700178 - prec = 0.5014611338398597 - recl = 0.2048955223880597

    AdaBoost:
    Wall time: 2min 28s
    2017 acc = 0.6340121078701155 - f1 = 0.1367373431414972 - prec = 0.40033783783783783 - recl = 0.08244912158636285
    2018 acc = 0.6055601826588931 - f1 = 0.32063711911357345 - prec = 0.5685964912280702 - recl = 0.08244912158636285
    2019 acc = 0.5985453712453112 - f1 = 0.21297855619175932 - prec = 0.5378508647575844 - recl = 0.08244912158636285
    2020 acc = 0.6125121241513094 - f1 = 0.19897387509583062 - prec = 0.5543871179756819 - recl = 0.08244912158636285
    2021 acc = 0.5821223470661673 - f1 = 0.23116501286291802 - prec = 0.5013949780789159 - recl = 0.15020895522388059

    Gradient Boost:
    Wall time: 14min 15s
    2017 acc = 0.6365804439552376 - f1 = 0.1251288090681584 - prec = 0.407088122605364 - recl = 0.07392590015654897
    2018 acc = 0.6194319193543755 - f1 = 0.31993841416474217 - prec = 0.6272891929965788 - recl = 0.07392590015654897
    2019 acc = 0.6107722704235031 - f1 = 0.2004587965413799 - prec = 0.627855563743552 - recl = 0.07392590015654897
    2020 acc = 0.6204142180635591 - f1 = 0.18508084272415484 - prec = 0.6259320629660314 - recl = 0.07392590015654897
    2021 acc = 0.5900624219725343 - f1 = 0.2463967685669696 - prec = 0.5329626687847498 - recl = 0.16023880597014925

    XGBoost:
    [16:48:08] WARNING: ..\src\learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
    Wall time: 54.3 s
    2017 acc = 0.5436922888766588 - f1 = 0.4093715371220516 - prec = 0.375599128540305 - recl = 0.4498173595407897
    2018 acc = 0.7836813234153767 - f1 = 0.7032542746828461 - prec = 0.8213694091662065 - recl = 0.4498173595407897
    2019 acc = 0.7821779343126306 - f1 = 0.6819948998787676 - prec = 0.8466888104629438 - recl = 0.4498173595407897
    2020 acc = 0.7675871512523536 - f1 = 0.6406739293432717 - prec = 0.8292042470601667 - recl = 0.4498173595407897
    2021 acc = 0.5801248439450687 - f1 = 0.3503322515839901 - prec = 0.49638712502737026 - recl = 0.2706865671641791

    MLPClassifier (pipeline):
    Wall time: 5min 43s
    2017 acc = 0.6008071913410383 - f1 = 0.3205661948376353 - prec = 0.3990671158331174 - recl = 0.26787267350843624
    2018 acc = 0.7812401275165858 - f1 = 0.7321259011781256 - prec = 0.7478267116890581 - recl = 0.26787267350843624
    2019 acc = 0.761303438994359 - f1 = 0.6903187458206406 - prec = 0.7355712136806271 - recl = 0.26787267350843624
    2020 acc = 0.7735779083699436 - f1 = 0.6890743134720101 - prec = 0.7573409110479635 - recl = 0.26787267350843624
    2021 acc = 0.5486641697877653 - f1 = 0.4275399037243476 - prec = 0.45528126264670177 - recl = 0.40298507462686567
    ```
</details>

### Data: 20210801d
Using `X, y = load_split_data(suffix='20210801d')`
**NOTE** Same as previous but with `4,1` ratio instead of `4,2`


<details>
    <summary><b>Click to show/hide details</b></summary>
    
    ```
    Logistic Regression:
    Wall time: 1min 41s
    2017 acc = 0.840946615299945 - f1 = 0.0 - prec = 0.0 - recl = 0.0
    2018 acc = 0.8133777535253741 - f1 = 0.0003076923076923077 - prec = 0.5 - recl = 0.0
    2019 acc = 0.8140480485639836 - f1 = 0.0009230769230769232 - prec = 0.5 - recl = 0.0
    2020 acc = 0.8042734067438808 - f1 = 0.000582665695557174 - prec = 0.4 - recl = 0.0
    2021 acc = 0.7984019975031211 - f1 = 0.001977750309023486 - prec = 0.4 - recl = 0.0009913258983890955

    Naive Bayes:
    Wall time: 783 ms
    2017 acc = 0.2564667033571822 - f1 = 0.27413288758880067 - prec = 0.16226148409893992 - recl = 0.8827374086889658
    2018 acc = 0.38355495562767455 - f1 = 0.33175591531755916 - prec = 0.2079462961517446 - recl = 0.8827374086889658
    2019 acc = 0.3170689803281505 - f1 = 0.32344264155225233 - prec = 0.19824048960289312 - recl = 0.8827374086889658
    2020 acc = 0.40309237176927026 - f1 = 0.33265293104548066 - prec = 0.21290928390626276 - recl = 0.8827374086889658
    2021 acc = 0.5509612983770287 - f1 = 0.328454070201643 - prec = 0.23506146445750936 - recl = 0.5449814126394052

    Random Forest:
    Wall time: 14.6 s
    2017 acc = 0.8322020424386962 - f1 = 0.04256803907885555 - prec = 0.23018867924528302 - recl = 0.023452518262206845
    2018 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.023452518262206845
    2019 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.023452518262206845
    2020 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.023452518262206845
    2021 acc = 0.7958052434456929 - f1 = 0.057182384136499884 - prec = 0.4105960264900662 - recl = 0.030731102850061958

    AdaBoost:
    Wall time: 2min 27s
    2017 acc = 0.8403962575674189 - f1 = 0.0 - prec = 0.0 - recl = 0.0
    2018 acc = 0.8140670323673856 - f1 = 0.010091743119266056 - prec = 0.7857142857142857 - recl = 0.0
    2019 acc = 0.8139621452910689 - f1 = 0.0006152899553914783 - prec = 0.2857142857142857 - recl = 0.0
    2020 acc = 0.8043019341587265 - f1 = 0.0 - prec = 0.0 - recl = 0.0
    2021 acc = 0.798501872659176 - f1 = 0.0 - prec = 0.0 - recl = 0.0

    Gradient Boost:
    Wall time: 13min 6s
    2017 acc = 0.8340365682137834 - f1 = 0.029327610872675255 - prec = 0.21025641025641026 - recl = 0.015763168012302962
    2018 acc = 0.8166805479766794 - f1 = 0.03594623168705633 - prec = 0.967479674796748 - recl = 0.015763168012302962
    2019 acc = 0.8142484895341179 - f1 = 0.0024604028909733966 - prec = 0.8888888888888888 - recl = 0.015763168012302962
    2020 acc = 0.8043875164032636 - f1 = 0.0008742532420224392 - prec = 1.0 - recl = 0.015763168012302962
    2021 acc = 0.7984519350811485 - f1 = 0.0 - prec = 0.0 - recl = 0.0

    XGBoost:
    [14:56:58] WARNING: ..\src\learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
    Wall time: 52.2 s
    2017 acc = 0.8193603620130863 - f1 = 0.07687499999999999 - prec = 0.20534223706176963 - recl = 0.04728950403690888
    2018 acc = 0.8918406617076883 - f1 = 0.5989350372736955 - prec = 0.9723374827109267 - recl = 0.04728950403690888
    2019 acc = 0.875382985425078 - f1 = 0.5035363906000455 - prec = 0.9713908450704225 - recl = 0.04728950403690888
    2020 acc = 0.8689450561990072 - f1 = 0.507609860664523 - prec = 0.9587044534412955 - recl = 0.04728950403690888
    2021 acc = 0.7950561797752809 - f1 = 0.0397753860552176 - prec = 0.35564853556485354 - recl = 0.021065675340768277

    MLPClassifier (pipeline):
    Wall time: 5min 46s
    2017 acc = 0.818259646548034 - f1 = 0.08101422387136673 - prec = 0.20695102685624012 - recl = 0.050365244136870435
    2018 acc = 0.8865849105373503 - f1 = 0.6820706867401981 - prec = 0.7151781191963532 - recl = 0.050365244136870435
    2019 acc = 0.865704550010022 - f1 = 0.48450208837107056 - prec = 0.8463901689708141 - recl = 0.050365244136870435
    2020 acc = 0.8758772180065042 - f1 = 0.5750561578279129 - prec = 0.8712636874815034 - recl = 0.050365244136870435
    2021 acc = 0.7270411985018727 - f1 = 0.19923820685613827 - prec = 0.24364027230383375 - recl = 0.16852540272614622
    ```
</details>

### Data: 20210801c
Using `X, y = load_split_data(suffix='20210801c')`
**NOTE** Same as previous but with `4,2` ratio instead of `2,1`


<details>
    <summary><b>Click to show/hide details</b></summary>
    
    ```
    Logistic Regression:
    Wall time: 2min 3s
    2017 acc = 0.8145294441386901 - f1 = 0.0 - prec = 0.0 - recl = 0.0
    2018 acc = 0.7776788534995261 - f1 = 0.0 - prec = 0.0 - recl = 0.0
    2019 acc = 0.775248403630845 - f1 = 0.0005093594804533299 - prec = 0.4 - recl = 0.0
    2020 acc = 0.7647629371826326 - f1 = 0.0004848484848484849 - prec = 0.6666666666666666 - recl = 0.0
    2021 acc = 0.7615480649188514 - f1 = 0.0012549675800041832 - prec = 0.3333333333333333 - recl = 0.0006286672254819782

    Naive Bayes:
    Wall time: 831 ms
    2017 acc = 0.26013575490735646 - f1 = 0.31415452638739305 - prec = 0.18969058050383353 - recl = 0.9136168809759314
    2018 acc = 0.39601941468738333 - f1 = 0.3779210790983849 - prec = 0.24507020639914065 - recl = 0.9136168809759314
    2019 acc = 0.3396615411047161 - f1 = 0.378292400183323 - prec = 0.2399042571379723 - recl = 0.9136168809759314
    2020 acc = 0.4188109773492326 - f1 = 0.38552254561906196 - prec = 0.2565842299662759 - recl = 0.9136168809759314
    2021 acc = 0.549912609238452 - f1 = 0.36861646234676004 - prec = 0.2768599389666421 - recl = 0.5513411567476949

    Random Forest:
    Wall time: 14.7 s
    2017 acc = 0.7724576530300251 - f1 = 0.12714051137696458 - prec = 0.22032520325203253 - recl = 0.08935047807451368
    2018 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.08935047807451368
    2019 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.08935047807451368
    2020 acc = 0.9999714725851543 - f1 = 0.9999393682168192 - prec = 1.0 - recl = 0.08935047807451368
    2021 acc = 0.7633458177278402 - f1 = 0.11668219944082013 - prec = 0.5278246205733558 - recl = 0.06559094719195306

    AdaBoost:
    Wall time: 2min 29s
    2017 acc = 0.8144071424203511 - f1 = 0.0019730351857941467 - prec = 0.375 - recl = 0.0009891196834817012
    2018 acc = 0.7779660530170309 - f1 = 0.020027886931169983 - prec = 0.5302013422818792 - recl = 0.0009891196834817012
    2019 acc = 0.775792457692638 - f1 = 0.011363636363636364 - prec = 0.625 - recl = 0.0009891196834817012
    2020 acc = 0.7647344097677868 - f1 = 0.0 - prec = 0.0 - recl = 0.0009891196834817012
    2021 acc = 0.7623470661672909 - f1 = 0.006264355815410315 - prec = 0.8823529411764706 - recl = 0.003143336127409891

    Gradient Boost:
    Wall time: 12min 55s
    2017 acc = 0.8149575001528772 - f1 = 0.014332247557003257 - prec = 0.5945945945945946 - recl = 0.007253544345532476
    2018 acc = 0.7820730061173498 - f1 = 0.04505411527812735 - prec = 0.8689320388349514 - recl = 0.007253544345532476
    2019 acc = 0.7763078773301263 - f1 = 0.009634888438133874 - prec = 0.95 - recl = 0.007253544345532476
    2020 acc = 0.7653334854795458 - f1 = 0.0050798258345428155 - prec = 1.0 - recl = 0.007253544345532476
    2021 acc = 0.7616479400749063 - f1 = 0.000418848167539267 - prec = 0.3333333333333333 - recl = 0.00020955574182732607

    XGBoost:
    [13:55:48] WARNING: ..\src\learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
    Wall time: 54.5 s
    2017 acc = 0.8090870176725983 - f1 = 0.05106382978723405 - prec = 0.32684824902723736 - recl = 0.027695351137487636
    2018 acc = 0.8933628191504638 - f1 = 0.692708764379707 - prec = 0.963619617775731 - recl = 0.027695351137487636
    2019 acc = 0.8734931134209547 - f1 = 0.6183483068417416 - prec = 0.9600321888412017 - recl = 0.027695351137487636
    2020 acc = 0.8601871398413876 - f1 = 0.5901145772350925 - prec = 0.9509433962264151 - recl = 0.027695351137487636
    2021 acc = 0.7530586766541822 - f1 = 0.13077869572859904 - prec = 0.40567066521264994 - recl = 0.07795473595976529

    MLPClassifier (pipeline):
    Wall time: 5min 55s
    2017 acc = 0.7852381825964655 - f1 = 0.1228771228771229 - prec = 0.2533470648815654 - recl = 0.08110781404549951
    2018 acc = 0.868835980355553 - f1 = 0.6714152097273186 - prec = 0.7575905179412242 - recl = 0.08110781404549951
    2019 acc = 0.8473498840305815 - f1 = 0.5413404456680719 - prec = 0.8333774834437087 - recl = 0.08110781404549951
    2020 acc = 0.8545387117019456 - f1 = 0.5979657809666483 - prec = 0.8548241659152389 - recl = 0.08110781404549951
    2021 acc = 0.6999750312109863 - f1 = 0.20529100529100527 - prec = 0.2783357245337159 - recl = 0.16261525565800503
    ```
</details>

### Data: 20210801b
Using `X, y = load_split_data(suffix='20210801b')`


<details>
    <summary><b>Click to show/hide details</b></summary>
    
    ```
    Logistic Regression:
    Wall time: 1min 27s
    2017 acc = 0.6302819054607717 - f1 = 0.02389409105586051 - prec = 0.4539877300613497 - recl = 0.012269938650306749
    2018 acc = 0.6030615468566013 - f1 = 0.07619811509925808 - prec = 0.5302325581395348 - recl = 0.012269938650306749
    2019 acc = 0.6134925407324686 - f1 = 0.017040489367899796 - prec = 0.4957627118644068 - recl = 0.012269938650306749
    2020 acc = 0.6090888343698294 - f1 = 0.013959847449089732 - prec = 0.485 - recl = 0.012269938650306749
    2021 acc = 0.5917103620474407 - f1 = 0.05914844649021864 - prec = 0.5298969072164949 - recl = 0.031322364411943936

    Naive Bayes:
    Wall time: 898 ms
    2017 acc = 0.38708493854338655 - f1 = 0.5365086705202312 - prec = 0.3720020520713095 - recl = 0.9618637041949926
    2018 acc = 0.46316666188000805 - f1 = 0.5542306591624535 - prec = 0.41431933252513725 - recl = 0.9618637041949926
    2019 acc = 0.42516393207914555 - f1 = 0.5465633681928038 - prec = 0.39311823764499465 - recl = 0.9618637041949926
    2020 acc = 0.46211559308495465 - f1 = 0.5330724845843342 - prec = 0.40335032229051115 - recl = 0.9618637041949926
    2021 acc = 0.510561797752809 - f1 = 0.481675392670157 - prec = 0.4254484304932735 - recl = 0.5550274223034735

    Random Forest:
    Wall time: 16.8 s
    2017 acc = 0.5766526019690577 - f1 = 0.2860678560379499 - prec = 0.378341516639389 - recl = 0.22997844470237108
    2018 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.22997844470237108
    2019 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.22997844470237108
    2020 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.22997844470237108
    2021 acc = 0.5713857677902622 - f1 = 0.29837325267718473 - prec = 0.4530784508440914 - recl = 0.22242535039609995

    AdaBoost:
    Wall time: 2min 36s
    2017 acc = 0.6221488411912187 - f1 = 0.07927283564297422 - prec = 0.3911764705882353 - recl = 0.04410545514839993
    2018 acc = 0.6165024842758264 - f1 = 0.25356364246184804 - prec = 0.5665750686984762 - recl = 0.04410545514839993
    2019 acc = 0.6194771354121925 - f1 = 0.1338721240956788 - prec = 0.556036816459123 - recl = 0.04410545514839993
    2020 acc = 0.6125691789810007 - f1 = 0.09706801409480754 - prec = 0.5431547619047619 - recl = 0.04410545514839993
    2021 acc = 0.5912609238451935 - f1 = 0.11350590274017114 - prec = 0.5097276264591439 - recl = 0.06386349786715417

    Gradient Boost:
    Wall time: 13min 46s
    2017 acc = 0.6258790436005626 - f1 = 0.0628063725490196 - prec = 0.4124748490945674 - recl = 0.03399104626098491
    2018 acc = 0.6309486200063184 - f1 = 0.21579397046259005 - prec = 0.7072 - recl = 0.03399104626098491
    2019 acc = 0.6254903645162214 - f1 = 0.1125059374363846 - prec = 0.6680096696212732 - recl = 0.03399104626098491
    2020 acc = 0.6168482912078508 - f1 = 0.05661305050221255 - prec = 0.7462962962962963 - recl = 0.03399104626098491
    2021 acc = 0.5913607990012485 - f1 = 0.06512053010396436 - prec = 0.5200729927007299 - recl = 0.03473491773308958

    XGBoost:
    [11:59:25] WARNING: ..\src\learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
    Wall time: 53.5 s
    2017 acc = 0.6108970831040176 - f1 = 0.2047244094488189 - prec = 0.41573604060913705 - recl = 0.13579837506217873
    2018 acc = 0.82268301789253 - f1 = 0.7450867052023121 - prec = 0.8731372169537449 - recl = 0.13579837506217873
    2019 acc = 0.8117000257709819 - f1 = 0.7066642876260147 - prec = 0.8878054247926473 - recl = 0.13579837506217873
    2020 acc = 0.7746048953043875 - f1 = 0.634331466654325 - prec = 0.8663716814159292 - recl = 0.13579837506217873
    2021 acc = 0.5747315855181023 - f1 = 0.25350631136044877 - prec = 0.45145176397127695 - recl = 0.17623400365630712

    MLPClassifier (pipeline):
    Wall time: 5min 38s
    2017 acc = 0.5897388858313459 - f1 = 0.22876192665823658 - prec = 0.3729385307346327 - recl = 0.1649809318520975
    2018 acc = 0.7797179700738103 - f1 = 0.7159680047400385 - prec = 0.7369263607257204 - recl = 0.1649809318520975
    2019 acc = 0.7437791713197606 - f1 = 0.5705509694759071 - prec = 0.8098092643051771 - recl = 0.1649809318520975
    2020 acc = 0.7593997831916471 - f1 = 0.6090664688977473 - prec = 0.8340738859972071 - recl = 0.1649809318520975
    2021 acc = 0.540374531835206 - f1 = 0.3767605633802817 - prec = 0.4238915130275788 - recl = 0.33906154783668496
    ```
</details>

### Data: 20210801a
Using `X, y = load_split_data(suffix='20210801a')`


<details>
    <summary><b>Click to show/hide details</b></summary>
    
    ```
    Logistic Regression:
    Wall time: 1min 31s
    2017 acc = 0.7058643673943619 - f1 = 0.0 - prec = 0.0 - recl = 0.0
    2018 acc = 0.678566300008616 - f1 = 0.0024955436720142605 - prec = 0.4827586206896552 - recl = 0.0
    2019 acc = 0.6882570225925608 - f1 = 0.0 - prec = 0.0 - recl = 0.0
    2020 acc = 0.6859987447937468 - f1 = 0.0 - prec = 0.0 - recl = 0.0
    2021 acc = 0.6651685393258427 - f1 = 0.009747452370403191 - prec = 0.5238095238095238 - recl = 0.004919499105545617

    Naive Bayes:
    Wall time: 878 ms
    2017 acc = 0.3231822907111845 - f1 = 0.45354004147328925 - prec = 0.2973970473970474 - recl = 0.9548856548856549
    2018 acc = 0.4122461874264051 - f1 = 0.4757269117458691 - prec = 0.33346501939376527 - recl = 0.9548856548856549
    2019 acc = 0.3654038885548206 - f1 = 0.46649012999518535 - prec = 0.31607620538918246 - recl = 0.9548856548856549
    2020 acc = 0.4160152906943573 - f1 = 0.4571898284411211 - prec = 0.32278717987119965 - recl = 0.9548856548856549
    2021 acc = 0.5024719101123596 - f1 = 0.419439426606841 - prec = 0.34430307088874007 - recl = 0.5365235539654144

    Random Forest:
    Wall time: 17.6 s
    2017 acc = 0.6352962759126766 - f1 = 0.22905894519131337 - prec = 0.3028024606971975 - recl = 0.1841995841995842
    2018 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.1841995841995842
    2019 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.1841995841995842
    2020 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.1841995841995842
    2021 acc = 0.6345568039950062 - f1 = 0.13966611803432868 - prec = 0.3303670745272525 - recl = 0.0885509838998211

    AdaBoost:
    Wall time: 2min 30s
    2017 acc = 0.7022564667033572 - f1 = 0.009762050030506408 - prec = 0.22429906542056074 - recl = 0.00498960498960499
    2018 acc = 0.6812659754731613 - f1 = 0.031419095828242274 - prec = 0.6741573033707865 - recl = 0.00498960498960499
    2019 acc = 0.6883429258654755 - f1 = 0.0007344840249724568 - prec = 0.6666666666666666 - recl = 0.00498960498960499
    2020 acc = 0.6862554915273578 - f1 = 0.0030819434372733865 - prec = 0.6296296296296297 - recl = 0.00498960498960499
    2021 acc = 0.6641697877652933 - f1 = 0.0014847809948032667 - prec = 0.18518518518518517 - recl = 0.0007453786523553966

    Gradient Boost:
    Wall time: 13min 51s
    2017 acc = 0.704274445055953 - f1 = 0.01747257212515238 - prec = 0.38392857142857145 - recl = 0.00893970893970894
    2018 acc = 0.6851431689594761 - f1 = 0.04278355016152973 - prec = 0.9351145038167938 - recl = 0.00893970893970894
    2019 acc = 0.6901468945966841 - f1 = 0.012952658943719785 - prec = 0.922077922077922 - recl = 0.00893970893970894
    2020 acc = 0.6870257317281908 - f1 = 0.006159978258900263 - prec = 1.0 - recl = 0.00893970893970894
    2021 acc = 0.6642696629213484 - f1 = 0.0 - prec = 0.0 - recl = 0.0

    XGBoost:
    [10:40:10] WARNING: ..\src\learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
    Wall time: 55.9 s
    2017 acc = 0.69186082064453 - f1 = 0.11951773545343353 - prec = 0.37458926615553123 - recl = 0.0711018711018711
    2018 acc = 0.8084953617277923 - f1 = 0.5955846676370693 - prec = 0.9269397772323957 - recl = 0.0711018711018711
    2019 acc = 0.7827506227987286 - f1 = 0.48200996791151773 - prec = 0.938580164849774 - recl = 0.0711018711018711
    2020 acc = 0.7648199920123239 - f1 = 0.4182074805928017 - prec = 0.936176935229068 - recl = 0.0711018711018711
    2021 acc = 0.6528339575530587 - f1 = 0.12332912988650695 - prec = 0.4001636661211129 - recl = 0.07289803220035779

    MLPClassifier (pipeline):
    Wall time: 6min 8s
    2017 acc = 0.6235553109521189 - f1 = 0.2510948905109489 - prec = 0.30263929618768326 - recl = 0.21455301455301456
    2018 acc = 0.8071455239955196 - f1 = 0.6799180132513466 - prec = 0.7286473232529628 - recl = 0.21455301455301456
    2019 acc = 0.7990436102282163 - f1 = 0.6113203367301728 - prec = 0.7697350069735007 - recl = 0.21455301455301456
    2020 acc = 0.8073543675472129 - f1 = 0.6332482485200673 - prec = 0.786987041036717 - recl = 0.21455301455301456
    2021 acc = 0.5864169787765293 - f1 = 0.3123546994354035 - prec = 0.3525112443778111 - recl = 0.28041144901610016
    ```
</details>

### Data: 20210730d
Using `X, y = load_split_data(suffix='20210730d')`


<details>
    <summary><b>Click to show/hide details</b></summary>
    
    ```
    Logistic Regression:
    Wall time: 54.7 s
    2017 acc = 0.4778939644102 - f1 = 0.620567060705715 - prec = 0.46865351053832727 - recl = 0.9182009468700684
    2018 acc = 0.5384703753697694 - f1 = 0.6306256608283914 - prec = 0.5346272263143537 - recl = 0.9182009468700684
    2019 acc = 0.5621223835294792 - f1 = 0.6652217697797627 - prec = 0.5642082590612002 - recl = 0.9182009468700684
    2020 acc = 0.5525189707308724 - f1 = 0.6570166615647003 - prec = 0.5540435888925767 - recl = 0.9182009468700684
    2021 acc = 0.5291885143570537 - f1 = 0.5079331941544886 - prec = 0.5831036548831636 - recl = 0.44993065187239945

    Naive Bayes:
    Wall time: 776 ms
    2017 acc = 0.472757292239956 - f1 = 0.6288100568279663 - prec = 0.46742191500256014 - recl = 0.9604155707522357
    2018 acc = 0.5424911686148367 - f1 = 0.6509181750449227 - prec = 0.5344945478101271 - recl = 0.9604155707522357
    2019 acc = 0.5554219282421327 - f1 = 0.6880199332877869 - prec = 0.551973175135414 - recl = 0.9604155707522357
    2020 acc = 0.5556855137787414 - f1 = 0.6600606761682344 - prec = 0.5559191176470588 - recl = 0.9604155707522357
    2021 acc = 0.5368289637952559 - f1 = 0.5697453263441108 - prec = 0.5716812511636566 - recl = 0.5678224687933425

    Random Forest:
    Wall time: 12.6 s
    2017 acc = 0.513116859291873 - f1 = 0.36839600190385535 - prec = 0.4642143142742903 - recl = 0.30536559705418204
    2018 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.30536559705418204
    2019 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.30536559705418204
    2020 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.30536559705418204
    2021 acc = 0.5429712858926342 - f1 = 0.5496062992125985 - prec = 0.587480273540242 - recl = 0.516319926028664

    AdaBoost:
    Wall time: 2min 25s
    2017 acc = 0.5355592246071057 - f1 = 0.3238671770675688 - prec = 0.5012400110223202 - recl = 0.2392162019989479
    2018 acc = 0.5938424423446969 - f1 = 0.6471556886227545 - prec = 0.5833220887869384 - recl = 0.2392162019989479
    2019 acc = 0.5937061535377831 - f1 = 0.6578572978708012 - prec = 0.6003697020377624 - recl = 0.2392162019989479
    2020 acc = 0.5719461402407714 - f1 = 0.6385295463852955 - prec = 0.5788852974578492 - recl = 0.2392162019989479
    2021 acc = 0.5289887640449438 - f1 = 0.5223820133684424 - prec = 0.5774096048360013 - recl = 0.47693018955154876

    Gradient Boost:
    Wall time: 12min 39s
    2017 acc = 0.5377606555372103 - f1 = 0.4233732550156381 - prec = 0.5040871934604905 - recl = 0.36493950552340876
    2018 acc = 0.6331313363393549 - f1 = 0.6847171487807285 - prec = 0.6118928933786227 - recl = 0.36493950552340876
    2019 acc = 0.6305586576181886 - f1 = 0.6949593342160014 - prec = 0.6241824513717829 - recl = 0.36493950552340876
    2020 acc = 0.6081759570947681 - f1 = 0.677801496633747 - prec = 0.6016575045810428 - recl = 0.36493950552340876
    2021 acc = 0.5354806491885143 - f1 = 0.5355502296784501 - prec = 0.5821122327146424 - recl = 0.49588534442903376

    XGBoost:
    [20:57:19] WARNING: ..\src\learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
    Wall time: 39.5 s
    2017 acc = 0.523818259646548 - f1 = 0.3114333716508975 - prec = 0.47530364372469636 - recl = 0.23158863755917938
    2018 acc = 0.8405468278813292 - f1 = 0.8492451395677203 - prec = 0.8238765080870344 - recl = 0.23158863755917938
    2019 acc = 0.8422243220800045 - f1 = 0.8592664487127095 - prec = 0.8244779923536908 - recl = 0.23158863755917938
    2020 acc = 0.8185085867518685 - f1 = 0.8388632794691251 - prec = 0.7936736161035226 - recl = 0.23158863755917938
    2021 acc = 0.5313358302122347 - f1 = 0.5321302158632035 - prec = 0.5773474686282994 - recl = 0.49348127600554786

    MLPClassifier (pipeline):
    Wall time: 4min 28s
    2017 acc = 0.5372714486638537 - f1 = 0.3888215814554559 - prec = 0.5038727234666108 - recl = 0.3165439242503945
    2018 acc = 0.7880467560814498 - f1 = 0.7951593205284778 - prec = 0.7878554534954073 - recl = 0.3165439242503945
    2019 acc = 0.7536580477049509 - f1 = 0.7518675550171614 - prec = 0.8186671691476666 - recl = 0.3165439242503945
    2020 acc = 0.7772008900553432 - f1 = 0.7776069252235321 - prec = 0.8274650021210835 - recl = 0.3165439242503945
    2021 acc = 0.5216479400749063 - f1 = 0.5121963640067221 - prec = 0.5700521423713444 - recl = 0.46500231160425337
    ```
</details>
    
### Data: 20210730b
Using `X, y = load_split_data(suffix='20210730b')`


<details>
    <summary><b>Click to show/hide details</b></summary>
    
    ```
    Logistic Regression:
    Wall time: 3min 35s
    2017 acc = 0.6685011924417538 - f1 = 0.03386205667438959 - prec = 0.536723163841808 - recl = 0.017482517482517484
    2018 acc = 0.6683419971854447 - f1 = 0.11820403176542456 - prec = 0.5122435473196558 - recl = 0.017482517482517484
    2019 acc = 0.7326976491137646 - f1 = 0.07123669286638146 - prec = 0.5976627712854758 - recl = 0.017482517482517484
    2020 acc = 0.7405431619786615 - f1 = 0.07411177847907971 - prec = 0.558282208588957 - recl = 0.017482517482517484
    2021 acc = 0.6712609238451935 - f1 = 0.13574898253905737 - prec = 0.45430579964850615 - recl = 0.07979626485568761

    Naive Bayes:
    Wall time: 821 ms
    2017 acc = 0.624961780713019 - f1 = 0.2723929291730929 - prec = 0.38330550918196993 - recl = 0.21126242178873758
    2018 acc = 0.5395330135845372 - f1 = 0.39550578742977793 - prec = 0.3511179542107377 - recl = 0.21126242178873758
    2019 acc = 0.69701915642986 - f1 = 0.2410157090596083 - prec = 0.3742481621742036 - recl = 0.21126242178873758
    2020 acc = 0.6666571575283847 - f1 = 0.3122020130672788 - prec = 0.3392171910974674 - recl = 0.21126242178873758
    2021 acc = 0.5188014981273408 - f1 = 0.4440983039113881 - prec = 0.3545831414094887 - recl = 0.594073159438185

    Random Forest:
    Wall time: 13.6 s
    2017 acc = 0.6044762428912126 - f1 = 0.2261306532663317 - prec = 0.323187414500684 - recl = 0.17390504232609497
    2018 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.17390504232609497
    2019 acc = 0.9999713655756951 - f1 = 0.9999470983441782 - prec = 1.0 - recl = 0.17390504232609497
    2020 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.17390504232609497
    2021 acc = 0.6745068664169788 - f1 = 0.08634707036725539 - prec = 0.47022900763358777 - recl = 0.04753820033955857

    AdaBoost:
    Wall time: 2min 27s
    2017 acc = 0.6644652357365621 - f1 = 0.08228800802809833 - prec = 0.45137614678899085 - recl = 0.04527051895472948
    2018 acc = 0.6721904707200091 - f1 = 0.2464016902152384 - prec = 0.5240101095197978 - recl = 0.04527051895472948
    2019 acc = 0.7350170374824614 - f1 = 0.15349432857665568 - prec = 0.5668918918918919 - recl = 0.04527051895472948
    2020 acc = 0.7413704570091858 - f1 = 0.1419647927314026 - prec = 0.5376344086021505 - recl = 0.04527051895472948
    2021 acc = 0.6722097378277153 - f1 = 0.2544298046342572 - prec = 0.4817204301075269 - recl = 0.17286618305294027

    Gradient Boost:
    Wall time: 13min 4s
    2017 acc = 0.6676450804133798 - f1 = 0.026509045316138274 - prec = 0.4966442953020134 - recl = 0.013617960986382039
    2018 acc = 0.6799735776443896 - f1 = 0.15781120096742499 - prec = 0.6342648845686513 - recl = 0.013617960986382039
    2019 acc = 0.7362769521518769 - f1 = 0.10198907956318251 - prec = 0.650497512437811 - recl = 0.013617960986382039
    2020 acc = 0.7445370000570548 - f1 = 0.0999095386471002 - prec = 0.6388174807197944 - recl = 0.013617960986382039
    2021 acc = 0.6775530586766542 - f1 = 0.1273145019597243 - prec = 0.5119565217391304 - recl = 0.07269640376601327

    XGBoost:
    [12:19:04] WARNING: ..\src\learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
    Wall time: 43.7 s
    2017 acc = 0.6447135082247906 - f1 = 0.20779929097354788 - prec = 0.4010526315789474 - recl = 0.1402281928597718
    2018 acc = 0.8033832103162066 - f1 = 0.6139183397247914 - prec = 0.8854725882544331 - recl = 0.1402281928597718
    2019 acc = 0.7970964693754832 - f1 = 0.4470973782771535 - prec = 0.8516646848989299 - recl = 0.1402281928597718
    2020 acc = 0.7980544303075255 - f1 = 0.41722235943031194 - prec = 0.8514784946236559 - recl = 0.1402281928597718
    2021 acc = 0.668414481897628 - f1 = 0.21531552824391398 - prec = 0.45940494200706 - recl = 0.14060811853681124

    MLPClassifier (pipeline):
    Wall time: 4min 53s
    2017 acc = 0.6067388246804868 - f1 = 0.3188221586696325 - prec = 0.3755927127526828 - recl = 0.27695988222304013
    2018 acc = 0.813435193428875 - f1 = 0.7035955466325973 - prec = 0.7462975510599168 - recl = 0.27695988222304013
    2019 acc = 0.8360106520058415 - f1 = 0.6416369438708467 - prec = 0.7852657374789401 - recl = 0.27695988222304013
    2020 acc = 0.8451246648028755 - f1 = 0.6439766542068333 - prec = 0.8078315235274761 - recl = 0.27695988222304013
    2021 acc = 0.6009488139825219 - f1 = 0.3394229974373812 - prec = 0.3654325382698469 - recl = 0.3168698873282914
    ```
</details>

### Data: 20210730a
Using `X, y = load_split_data(suffix='20210730a')`


<details>
    <summary><b>Click to show/hide details</b></summary>
    
    ```
    Logistic Regression:
    Wall time: 1min 53s
    2017 acc = 0.8854644407753929 - f1 = 0.0 - prec = 0.0 - recl = 0.0
    2018 acc = 0.7326459691547719 - f1 = 0.0008586454867446603 - prec = 0.5714285714285714 - recl = 0.0
    2019 acc = 0.6961028548521032 - f1 = 0.0 - prec = 0.0 - recl = 0.0
    2020 acc = 0.7064529012380898 - f1 = 0.0 - prec = 0.0 - recl = 0.0
    2021 acc = 0.736629213483146 - f1 = 0.0011363636363636363 - prec = 0.5 - recl = 0.0005688282138794084

    Naive Bayes:
    Wall time: 792 ms
    2017 acc = 0.16944903075888215 - f1 = 0.20933752474094772 - prec = 0.1174549255291351 - recl = 0.9614973262032086
    2018 acc = 0.4405927798041299 - f1 = 0.4388037340094503 - prec = 0.2998267580124419 - recl = 0.9614973262032086
    2019 acc = 0.3696990522005555 - f1 = 0.46686688626235223 - prec = 0.31419722901385494 - recl = 0.9614973262032086
    2020 acc = 0.4098248416728476 - f1 = 0.4423719676549866 - prec = 0.3060798209623275 - recl = 0.9614973262032086
    2021 acc = 0.5764294631710362 - f1 = 0.4066871852266369 - prec = 0.32221236976280204 - recl = 0.5511945392491467

    Random Forest:
    Wall time: 12.9 s
    2017 acc = 0.666605515807497 - f1 = 0.19705449189985272 - prec = 0.13597560975609757 - recl = 0.35775401069518714
    2018 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.35775401069518714
    2019 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.35775401069518714
    2020 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.35775401069518714
    2021 acc = 0.7340324594257178 - f1 = 0.04105149441843716 - prec = 0.40714285714285714 - recl = 0.02161547212741752

    AdaBoost:
    Wall time: 2min 27s
    2017 acc = 0.8856478933529016 - f1 = 0.003198294243070363 - prec = 0.5 - recl = 0.0016042780748663102
    2018 acc = 0.733967086935294 - f1 = 0.01926945473795659 - prec = 0.674074074074074 - recl = 0.0016042780748663102
    2019 acc = 0.6967328121868109 - f1 = 0.008240471954302837 - prec = 0.6666666666666666 - recl = 0.0016042780748663102
    2020 acc = 0.7067096479717008 - f1 = 0.006762631629794223 - prec = 0.5737704918032787 - recl = 0.0016042780748663102
    2021 acc = 0.7362297128589264 - f1 = 0.00037850113550340656 - prec = 0.1 - recl = 0.00018960940462646946

    Gradient Boost:
    Wall time: 14min 1s
    2017 acc = 0.8671803338836911 - f1 = 0.09348914858096828 - prec = 0.21292775665399238 - recl = 0.059893048128342244
    2018 acc = 0.7341968465492978 - f1 = 0.01237861487568029 - prec = 0.9508196721311475 - recl = 0.059893048128342244
    2019 acc = 0.6973914039458237 - f1 = 0.008816357156255861 - prec = 0.9591836734693877 - recl = 0.059893048128342244
    2020 acc = 0.7073657785131512 - f1 = 0.0062003487696182915 - prec = 1.0 - recl = 0.059893048128342244
    2021 acc = 0.7365293383270911 - f1 = 0.000378931413414172 - prec = 0.25 - recl = 0.00018960940462646946

    XGBoost:
    [09:44:27] WARNING: ..\src\learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
    Wall time: 2min 21s
    2017 acc = 0.7822417904971565 - f1 = 0.13546977421704298 - prec = 0.12405513561582926 - recl = 0.14919786096256685
    2018 acc = 0.8105631982538269 - f1 = 0.4695190606401801 - prec = 0.9343790012804097 - recl = 0.14919786096256685
    2019 acc = 0.7961515333734215 - f1 = 0.5068920135762278 - prec = 0.9568514644351465 - recl = 0.14919786096256685
    2020 acc = 0.7786557882124722 - f1 = 0.4100965559188018 - prec = 0.9420188613342647 - recl = 0.14919786096256685
    2021 acc = 0.7278401997503121 - f1 = 0.04553415061295971 - prec = 0.2981651376146789 - recl = 0.02464922260144103

    MLPClassifier (pipeline):
    Wall time: 17min 26s
    2017 acc = 0.8529321836971809 - f1 = 0.07535563244905806 - prec = 0.13406292749658003 - recl = 0.052406417112299465
    2018 acc = 0.8333668399437089 - f1 = 0.6436117936117935 - prec = 0.7516499282639886 - recl = 0.052406417112299465
    2019 acc = 0.8032242361767317 - f1 = 0.621544222932041 - prec = 0.7479125248508947 - recl = 0.052406417112299465
    2020 acc = 0.8167113596165916 - f1 = 0.6313615238969533 - prec = 0.770696175935005 - recl = 0.052406417112299465
    2021 acc = 0.6687640449438202 - f1 = 0.2425488180883864 - prec = 0.3049095607235142 - recl = 0.20136518771331058
    ```
</details>

### Data: 20210729a
Using `X, y = load_split_data(suffix='20210729a')`

<details>
    <summary><b>Click to show/hide details</b></summary>
    
    ```
    Logistic Regression:
    Wall time: 2min 47s
    2017 acc = 0.8885831345930411 - f1 = 0.0 - prec = 0.0 - recl = 0.0
    2018 acc = 0.7645251155978058 - f1 = 0.0002438726984514084 - prec = 1.0 - recl = 0.0
    2019 acc = 0.7318386163846176 - f1 = 0.0 - prec = 0.0 - recl = 0.0
    2020 acc = 0.7278769897871855 - f1 = 0.0 - prec = 0.0 - recl = 0.0
    2021 acc = 0.7668913857677903 - f1 = 0.0 - prec = 0.0 - recl = 0.0

    Naive Bayes:
    Wall time: 847 ms
    2017 acc = 0.15880878126337675 - f1 = 0.1986484912035419 - prec = 0.11111835245046923 - recl = 0.9357848518111965
    2018 acc = 0.4045205204055257 - f1 = 0.3781416831623778 - prec = 0.2507358205393366 - recl = 0.9357848518111965
    2019 acc = 0.34206683274632765 - f1 = 0.42423634951261685 - prec = 0.2771593215899417 - recl = 0.9357848518111965
    2020 acc = 0.39564671649455124 - f1 = 0.413726304137263 - prec = 0.28105730184990224 - recl = 0.9357848518111965
    2021 acc = 0.5673408239700375 - f1 = 0.3656465075413677 - prec = 0.2777530589543938 - recl = 0.5349185946872322

    Random Forest:
    Wall time: 15 s
    2017 acc = 0.8019323671497585 - f1 = 0.0970170058544745 - prec = 0.09858356940509914 - recl = 0.09549945115257959
    2018 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.09549945115257959
    2019 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.09549945115257959
    2020 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.09549945115257959
    2021 acc = 0.7661922596754057 - f1 = 0.008470986869970352 - prec = 0.37037037037037035 - recl = 0.004284490145672665

    AdaBoost:
    Wall time: 2min 31s
    2017 acc = 0.8885831345930411 - f1 = 0.0 - prec = 0.0 - recl = 0.0
    2018 acc = 0.7644676756943048 - f1 = 0.0 - prec = 0.0 - recl = 0.0
    2019 acc = 0.7343870801477537 - f1 = 0.028080469404861697 - prec = 0.7486033519553073 - recl = 0.0
    2020 acc = 0.7280196268614139 - f1 = 0.0025109855618330196 - prec = 0.631578947368421 - recl = 0.0
    2021 acc = 0.7668913857677903 - f1 = 0.0 - prec = 0.0 - recl = 0.0

    Gradient Boost:
    Wall time: 12min 59s
    2017 acc = 0.8588026661774598 - f1 = 0.058703628210354665 - prec = 0.11410459587955626 - recl = 0.03951701427003293
    2018 acc = 0.7646974353083087 - f1 = 0.0017058608504934814 - prec = 1.0 - recl = 0.03951701427003293
    2019 acc = 0.7395985453712454 - f1 = 0.05859213250517598 - prec = 0.9593220338983051 - recl = 0.03951701427003293
    2020 acc = 0.7289039767216294 - f1 = 0.007726845567505482 - prec = 0.9736842105263158 - recl = 0.03951701427003293
    2021 acc = 0.7668414481897627 - f1 = 0.0 - prec = 0.0 - recl = 0.0

    XGBoost:
    [16:26:34] WARNING: ..\src\learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
    Wall time: 43.5 s
    2017 acc = 0.5707209686296092 - f1 = 0.17624970664163345 - prec = 0.11208955223880597 - recl = 0.4121844127332602
    2018 acc = 0.8110514374335851 - f1 = 0.3369948604252746 - prec = 0.9704004643064422 - recl = 0.4121844127332602
    2019 acc = 0.8007330412622055 - f1 = 0.41858133511571555 - prec = 0.9619815668202765 - recl = 0.4121844127332602
    2020 acc = 0.7901808638101215 - f1 = 0.3836420011732171 - prec = 0.956140350877193 - recl = 0.4121844127332602
    2021 acc = 0.7585518102372035 - f1 = 0.03627665935818218 - prec = 0.2607449856733524 - recl = 0.019494430162810626

    MLPClassifier (pipeline):
    Wall time: 6min 5s
    2017 acc = 0.7113679447196233 - f1 = 0.1948140566359604 - prec = 0.14133663366336632 - recl = 0.31339187705817784
    2018 acc = 0.8345443579654787 - f1 = 0.643038602143875 - prec = 0.6536087668472099 - recl = 0.31339187705817784
    2019 acc = 0.8238696561005641 - f1 = 0.5983151570560961 - prec = 0.7701748486886348 - recl = 0.31339187705817784
    2020 acc = 0.8278370514064015 - f1 = 0.6158742282477245 - prec = 0.7838626053143227 - recl = 0.31339187705817784
    2021 acc = 0.6543320848938826 - f1 = 0.22224719101123597 - prec = 0.23369565217391305 - recl = 0.21186803770351328
    ```
</details>


### Data: 20210715
Using `X, y = load_split_data(suffix='20210715')`

<details>
    <summary><b>Click to show/hide details</b></summary>

    ```
    Logistic Regression:
    Wall time: 7.98 s
    2017 acc = 0.8506231671554252 - f1 = 0.012121212121212123 - prec = 0.6521739130434783 - recl = 0.006117455138662317
    2018 acc = 0.8413509865303427 - f1 = 0.017081850533807827 - prec = 0.5217391304347826 - recl = 0.006117455138662317
    2019 acc = 0.8123299831056897 - f1 = 0.03447259870359458 - prec = 0.5416666666666666 - recl = 0.006117455138662317
    2020 acc = 0.8076396416956695 - f1 = 0.04988023108355644 - prec = 0.5145348837209303 - recl = 0.006117455138662317
    2021 acc = 0.8172492836676217 - f1 = 0.02207911683532659 - prec = 0.5454545454545454 - recl = 0.011267605633802818

    Random Forest:
    Wall time: 57.6 s
    2017 acc = 0.8464687194525904 - f1 = 0.022559315441462464 - prec = 0.24369747899159663 - recl = 0.011827079934747145
    2018 acc = 0.9999712800482495 - f1 = 0.9999095431931254 - prec = 1.0 - recl = 0.011827079934747145
    2019 acc = 0.9999713655756951 - f1 = 0.9999239138705014 - prec = 1.0 - recl = 0.011827079934747145
    2020 acc = 1.0 - f1 = 1.0 - prec = 1.0 - recl = 0.011827079934747145
    2021 acc = 0.8149570200573066 - f1 = 0.07610872675250357 - prec = 0.44333333333333336 - recl = 0.04162754303599374

    Gradient Boost:
    Wall time: 1min 50s
    2017 acc = 0.8500733137829912 - f1 = 0.009685230024213074 - prec = 0.46153846153846156 - recl = 0.004893964110929853
    2018 acc = 0.8420689853241047 - f1 = 0.028273546562996993 - prec = 0.6106870229007634 - recl = 0.004893964110929853
    2019 acc = 0.8144489305042523 - f1 = 0.0553935860058309 - prec = 0.6597222222222222 - recl = 0.004893964110929853
    2020 acc = 0.8100644719575512 - f1 = 0.06409895979758223 - prec = 0.631578947368421 - recl = 0.004893964110929853
    2021 acc = 0.8174212034383954 - f1 = 0.05291319857312723 - prec = 0.5266272189349113 - recl = 0.02785602503912363

    Naive Bayes:
    Wall time: 111 ms
    2017 acc = 0.7651515151515151 - f1 = 0.22374798061389334 - prec = 0.2216 - recl = 0.22593800978792822
    2018 acc = 0.7337947672247911 - f1 = 0.2773836438761986 - prec = 0.24373201808466913 - recl = 0.22593800978792822
    2019 acc = 0.7229333104257939 - f1 = 0.303082685105157 - prec = 0.28774617067833697 - recl = 0.22593800978792822
    2020 acc = 0.7000342328978147 - f1 = 0.3202534100458983 - prec = 0.28418999541073886 - recl = 0.22593800978792822
    2021 acc = 0.6636676217765043 - f1 = 0.3274894007104389 - prec = 0.25831525668835864 - recl = 0.4472613458528952

    XGBoost:
    [13:39:00] WARNING: ..\src\learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
    Wall time: 5.2 s
    2017 acc = 0.8421309872922776 - f1 = 0.07780157030692363 - prec = 0.31142857142857144 - recl = 0.04445350734094616
    2018 acc = 0.8607082340101668 - f1 = 0.24384159650763954 - prec = 0.8826185101580135 - recl = 0.04445350734094616
    2019 acc = 0.8320304670274604 - f1 = 0.2385773624091381 - prec = 0.8118374558303887 - recl = 0.04445350734094616
    2020 acc = 0.8276944143321732 - f1 = 0.23852748361069087 - prec = 0.8023748939779474 - recl = 0.04445350734094616
    2021 acc = 0.8130085959885387 - f1 = 0.13287270794578795 - prec = 0.44014084507042256 - recl = 0.0782472613458529
    ```
</details>

## Cross Validation

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
import xgboost as xgb

from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score

def print_scores(clf,X,y):
    for yr in X.index.year.unique():
        year = str(yr)
        print(f"  {year} acc = {clf.score(X.loc[year], y.loc[year])} - " + \
              f"f1 = {f1_score(y.loc[year],clf.predict(X.loc[year]))} - " + \
              f"prec = {precision_score(y.loc[year],clf.predict(X.loc[year]), zero_division=0)} - " + \
              f"recl = {recall_score(y.loc[year],clf.predict(X.loc[year]), zero_division=0)}"
             )

clfs = {
        'Logistic Regression': LogisticRegression(random_state=0, max_iter=1000), # ~54% prec
        'Random Forest': RandomForestClassifier(random_state=0, max_depth=3), # 0% prec
        #'Gradient Boost': GradientBoostingClassifier(random_state=0, learning_rate=0.1), # ~32% prec
        #'LinearSVC': make_pipeline(StandardScaler(), LinearSVC(random_state=0, tol=1e-5)),  # ~48% prec
        #'SVC' : make_pipeline(StandardScaler(), SVC(gamma='auto', random_state=0)), # never came back ~20min
        #'AdaBoost': make_pipeline(StandardScaler(), AdaBoostClassifier(n_estimators=100, random_state=0)), # ~32%
        'XGBoost' : xgb.XGBClassifier(n_estimators=20, random_state=42, learning_rate=0.1, n_jobs=-1, gamma=0.9, max_depth=4)
       }


for k,clf in clfs.items():
    scores = cross_val_score(clf, X, y, scoring='precision', cv=5, n_jobs=-1)
    print("%s: precision: %0.2f (+/- %0.2f)" % (k, scores.mean(), scores.std()))

In [None]:
scores

# GradientBoost Grid Search

In [None]:
from sklearn.model_selection import GridSearchCV

parameters = {'learning_rate': [0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1, 0.3, 0.6, 1],
              'loss': ['deviance'],
              'n_estimators': [100],
              'criterion': ['friedman_mse'],
              'max_depth' : [3],
              'random_state': [0],
              'max_features' : [None]
             }

gbc = GradientBoostingClassifier()
clf = GridSearchCV(gbc, parameters, n_jobs=-1, verbose=4, scoring='precision').fit(X, y)

In [None]:
pd.DataFrame(clf.cv_results_)

In [None]:
from sklearn.model_selection import GridSearchCV

parameters = {'learning_rate': [0.03, 0.06, 0.1],
              'loss': ['deviance', 'exponential'],
              'n_estimators': [100, 500, 1000],
              'criterion': ['friedman_mse', 'mse', 'mae'],
              'max_depth' : [3,4,5],
              'random_state': [0],
              'max_features' : [None, 'sqrt', 'log2']
             }

gbc = GradientBoostingClassifier()
clf = GridSearchCV(gbc, parameters, n_jobs=-1, verbose=4, scoring='precision').fit(X, y)

In [None]:
pd.DataFrame(clf.cv_results_).to_csv('grid_search_output.csv')

# XGBoost Grid Search

In [None]:
from sklearn.model_selection import GridSearchCV
import xgboost as xgb

In [None]:
parameters = {'learning_rate': [0.1],
              'n_estimators': [19],
              'gamma': [0.9],
              'max_depth' : [4],
              'booster' :['gblinear'],
             }

In [None]:
#xgb.XGBClassifier(n_estimators=20, random_state=42, learning_rate=0.1, n_jobs=-1, gamma=0.9, max_depth=4)
gbc = xgb.XGBClassifier(n_jobs=-1, random_state=42)
clf = GridSearchCV(gbc, parameters, verbose=4, scoring='precision').fit(X, y, eval_metric=['map'])
#pd.DataFrame(clf.cv_results_)#.to_csv('grid_search_output.csv')

In [None]:
pd.DataFrame(clf.cv_results_)