In [1]:
import os
import re
import joblib
import scipy
import multiprocessing
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import xgboost
import lightgbm
from catboost import CatBoostClassifier
from sklearn import ensemble
from sklearn import metrics
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import SelectPercentile
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier

%matplotlib inline

In [2]:
features = joblib.load("features.gz")
labels = joblib.load("labels.gz")
x_train, x_valid, y_train, y_valid = train_test_split(features, labels, test_size=0.3, random_state=417)

In [3]:
# xgboost
xgb_model = xgboost.XGBClassifier(n_jobs=multiprocessing.cpu_count(), random_state=2020)

# Hyperparameters
n_estimators = [int(x) for x in np.linspace(100, 1000, 100)]
max_depth = [int(x) for x in np.linspace(5, 15, 10)]
gamma = [float(x) for x in np.linspace(0, 1, 10)]
learning_rate = [0.1, 0.01, 0.001]
param_grid = dict(
    n_estimators=n_estimators, 
    max_depth=max_depth, 
    gamma=gamma, 
    learning_rate=learning_rate)

# Random search
rand_search = RandomizedSearchCV(
    xgb_model, 
    param_grid, 
    scoring="roc_auc_ovr", 
    cv=ShuffleSplit(n_splits=10, test_size=0.3, random_state=417), 
    n_jobs=multiprocessing.cpu_count(), 
    verbose=1)
rand_result = rand_search.fit(x_train, y_train)
print(rand_result.best_params_)
xgb_model = xgboost.XGBClassifier(**rand_result.best_params_)
xgb_model.fit(features, labels)

# Evaluate step
y_pred_xgb = xgb_model.predict(x_valid)
y_prob_xgb = xgb_model.predict_proba(x_valid)
accuracy = metrics.accuracy_score(y_valid, y_pred_xgb)
roc_auc_score = metrics.roc_auc_score(y_valid, y_prob_xgb[:, 1])
print("Accuracy: {:.4f}".format(accuracy))
print("ROC AUC: {:.4f}".format(roc_auc_score))
print("Confusion matrix: \n{}".format(metrics.confusion_matrix(y_valid, y_pred_xgb)))

Fitting 10 folds for each of 10 candidates, totalling 100 fits


[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    2.6s
[Parallel(n_jobs=16)]: Done 100 out of 100 | elapsed:    5.2s finished


{'n_estimators': 809, 'max_depth': 7, 'learning_rate': 0.1, 'gamma': 0.2222222222222222}
Accuracy: 1.0000
ROC AUC: 1.0000
Confusion matrix: 
[[30  0]
 [ 0  4]]


In [4]:
# LGB
lgb_model = lightgbm.LGBMClassifier(n_jobs=multiprocessing.cpu_count(), random_state=417)

# Hyperparameters
boosting_type = ["gbdt", "dart", "goss", "rf"]
n_estimators = [int(x) for x in np.linspace(100, 1000, 100)]
max_depth = [int(x) for x in np.linspace(5, 15, 10)]
num_leaves = [int(x) for x in np.linspace(1, 50, 10)]
learning_rate = [float(x) for x in np.linspace(0.001, 0.1, 10)]
feature_fraction = [float(x) for x in np.linspace(0, 1, 10)]
min_data_in_leaf = [int(x) for x in np.linspace(1, 50, 10)]
param_grid = dict(
    boosting_type=boosting_type, 
    n_estimators=n_estimators, 
    max_depth=max_depth, 
    num_leaves=num_leaves, 
    learning_rate=learning_rate, 
    feature_fraction=feature_fraction, 
    min_data_in_leaf=min_data_in_leaf)

# Random search
rand_search = RandomizedSearchCV(
    lgb_model, 
    param_grid, 
    scoring="roc_auc_ovr", 
    cv=ShuffleSplit(n_splits=10, test_size=0.3, random_state=417), 
    n_jobs=multiprocessing.cpu_count(), 
    verbose=1)
rand_result = rand_search.fit(x_train, y_train)
print(rand_result.best_params_)
lgb_model = lightgbm.LGBMClassifier(**rand_result.best_params_)
lgb_model.fit(features, labels)

# Evaluate step
y_pred_lgb = lgb_model.predict(x_valid)
y_prob_lgb = lgb_model.predict_proba(x_valid)
accuracy = metrics.accuracy_score(y_valid, y_pred_lgb)
roc_auc_score = metrics.roc_auc_score(y_valid, y_prob_lgb[:, 1])
print("Accuracy: {:.4f}".format(accuracy))
print("ROC AUC: {:.4f}".format(roc_auc_score))
print("Confusion matrix: \n{}".format(metrics.confusion_matrix(y_valid, y_pred_lgb)))

Fitting 10 folds for each of 10 candidates, totalling 100 fits


[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.5s


{'num_leaves': 39, 'n_estimators': 172, 'min_data_in_leaf': 11, 'max_depth': 13, 'learning_rate': 0.012, 'feature_fraction': 0.6666666666666666, 'boosting_type': 'gbdt'}
Accuracy: 0.9412
ROC AUC: 1.0000
Confusion matrix: 
[[30  0]
 [ 2  2]]


[Parallel(n_jobs=16)]: Done 100 out of 100 | elapsed:    1.1s finished


In [5]:
# CatBoost
cat_model = CatBoostClassifier(random_state=417)

# Hyperparameters
depth = [int(x) for x in np.linspace(3, 36, 12)]
learning_rate = [float(x) for x in np.linspace(0.001, 0.1, 10)]
param_grid = dict(
    depth=depth, 
    learning_rate=learning_rate)

# Random search
rand_search = RandomizedSearchCV(
    cat_model, 
    param_grid, 
    scoring="roc_auc_ovr", 
    cv=ShuffleSplit(n_splits=10, test_size=0.3, random_state=417), 
    n_jobs=multiprocessing.cpu_count(), 
    verbose=1)
rand_result = rand_search.fit(x_train, y_train)
print(rand_result.best_params_)
cat_model = CatBoostClassifier(**rand_result.best_params_)
cat_model.fit(features, labels)

# Evaluate step
y_prob_cat = cat_model.predict_proba(x_valid)
roc_auc_score = metrics.roc_auc_score(y_valid, y_prob_cat[:, 1])
print("ROC AUC: {:.4f}".format(roc_auc_score))

[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.


Fitting 10 folds for each of 10 candidates, totalling 100 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:   59.6s
[Parallel(n_jobs=16)]: Done 100 out of 100 | elapsed:  1.8min finished


0:	learn: 0.6631103	total: 56.1ms	remaining: 56.1s
1:	learn: 0.6311843	total: 65.6ms	remaining: 32.7s
2:	learn: 0.6021128	total: 74.5ms	remaining: 24.8s
3:	learn: 0.5735652	total: 76.7ms	remaining: 19.1s
4:	learn: 0.5503344	total: 77.7ms	remaining: 15.5s
5:	learn: 0.5259575	total: 94.4ms	remaining: 15.6s
6:	learn: 0.5069744	total: 95.3ms	remaining: 13.5s
7:	learn: 0.4863525	total: 111ms	remaining: 13.8s
8:	learn: 0.4636498	total: 113ms	remaining: 12.5s
9:	learn: 0.4398886	total: 130ms	remaining: 12.9s
10:	learn: 0.4247178	total: 148ms	remaining: 13.3s
11:	learn: 0.4093896	total: 165ms	remaining: 13.6s
12:	learn: 0.3927320	total: 182ms	remaining: 13.8s
13:	learn: 0.3743382	total: 186ms	remaining: 13.1s
14:	learn: 0.3584354	total: 189ms	remaining: 12.4s
15:	learn: 0.3386648	total: 197ms	remaining: 12.1s
16:	learn: 0.3266236	total: 214ms	remaining: 12.4s
17:	learn: 0.3168239	total: 216ms	remaining: 11.8s
18:	learn: 0.3077371	total: 232ms	remaining: 12s
19:	learn: 0.2989132	total: 249ms	re

164:	learn: 0.0309922	total: 1.93s	remaining: 9.76s
165:	learn: 0.0307826	total: 1.94s	remaining: 9.77s
166:	learn: 0.0305651	total: 1.96s	remaining: 9.78s
167:	learn: 0.0302403	total: 1.96s	remaining: 9.73s
168:	learn: 0.0300506	total: 1.98s	remaining: 9.74s
169:	learn: 0.0298145	total: 1.99s	remaining: 9.69s
170:	learn: 0.0295890	total: 2s	remaining: 9.71s
171:	learn: 0.0293405	total: 2.02s	remaining: 9.72s
172:	learn: 0.0290328	total: 2.02s	remaining: 9.66s
173:	learn: 0.0287728	total: 2.04s	remaining: 9.68s
174:	learn: 0.0285625	total: 2.05s	remaining: 9.65s
175:	learn: 0.0283653	total: 2.06s	remaining: 9.67s
176:	learn: 0.0282037	total: 2.08s	remaining: 9.68s
177:	learn: 0.0279993	total: 2.08s	remaining: 9.63s
178:	learn: 0.0278039	total: 2.09s	remaining: 9.61s
179:	learn: 0.0273699	total: 2.1s	remaining: 9.55s
180:	learn: 0.0272279	total: 2.11s	remaining: 9.56s
181:	learn: 0.0270473	total: 2.13s	remaining: 9.57s
182:	learn: 0.0268815	total: 2.15s	remaining: 9.58s
183:	learn: 0.02

331:	learn: 0.0143218	total: 4.53s	remaining: 9.11s
332:	learn: 0.0142847	total: 4.55s	remaining: 9.11s
333:	learn: 0.0142350	total: 4.56s	remaining: 9.1s
334:	learn: 0.0141789	total: 4.57s	remaining: 9.08s
335:	learn: 0.0141300	total: 4.59s	remaining: 9.07s
336:	learn: 0.0140815	total: 4.61s	remaining: 9.07s
337:	learn: 0.0140218	total: 4.61s	remaining: 9.04s
338:	learn: 0.0139741	total: 4.63s	remaining: 9.03s
339:	learn: 0.0139267	total: 4.65s	remaining: 9.03s
340:	learn: 0.0138795	total: 4.67s	remaining: 9.02s
341:	learn: 0.0138259	total: 4.68s	remaining: 9s
342:	learn: 0.0137796	total: 4.69s	remaining: 8.99s
343:	learn: 0.0137454	total: 4.71s	remaining: 8.98s
344:	learn: 0.0136996	total: 4.73s	remaining: 8.97s
345:	learn: 0.0136525	total: 4.74s	remaining: 8.95s
346:	learn: 0.0136189	total: 4.75s	remaining: 8.95s
347:	learn: 0.0135723	total: 4.76s	remaining: 8.93s
348:	learn: 0.0135277	total: 4.78s	remaining: 8.92s
349:	learn: 0.0134834	total: 4.8s	remaining: 8.91s
350:	learn: 0.013

501:	learn: 0.0091805	total: 6.91s	remaining: 6.86s
502:	learn: 0.0091604	total: 6.93s	remaining: 6.84s
503:	learn: 0.0091404	total: 6.94s	remaining: 6.83s
504:	learn: 0.0091205	total: 6.96s	remaining: 6.82s
505:	learn: 0.0091007	total: 6.98s	remaining: 6.81s
506:	learn: 0.0090809	total: 6.99s	remaining: 6.8s
507:	learn: 0.0090613	total: 7.01s	remaining: 6.79s
508:	learn: 0.0090418	total: 7.03s	remaining: 6.78s
509:	learn: 0.0090223	total: 7.05s	remaining: 6.77s
510:	learn: 0.0090029	total: 7.06s	remaining: 6.76s
511:	learn: 0.0089837	total: 7.08s	remaining: 6.75s
512:	learn: 0.0089644	total: 7.1s	remaining: 6.74s
513:	learn: 0.0089453	total: 7.12s	remaining: 6.73s
514:	learn: 0.0089263	total: 7.13s	remaining: 6.72s
515:	learn: 0.0088966	total: 7.13s	remaining: 6.69s
516:	learn: 0.0088778	total: 7.15s	remaining: 6.68s
517:	learn: 0.0088591	total: 7.17s	remaining: 6.67s
518:	learn: 0.0088298	total: 7.17s	remaining: 6.65s
519:	learn: 0.0088107	total: 7.19s	remaining: 6.64s
520:	learn: 0.

663:	learn: 0.0068205	total: 9.13s	remaining: 4.62s
664:	learn: 0.0068127	total: 9.15s	remaining: 4.61s
665:	learn: 0.0068049	total: 9.17s	remaining: 4.6s
666:	learn: 0.0067972	total: 9.19s	remaining: 4.58s
667:	learn: 0.0067895	total: 9.2s	remaining: 4.57s
668:	learn: 0.0067724	total: 9.21s	remaining: 4.55s
669:	learn: 0.0067648	total: 9.22s	remaining: 4.54s
670:	learn: 0.0067571	total: 9.24s	remaining: 4.53s
671:	learn: 0.0067495	total: 9.26s	remaining: 4.52s
672:	learn: 0.0067419	total: 9.27s	remaining: 4.5s
673:	learn: 0.0067343	total: 9.29s	remaining: 4.49s
674:	learn: 0.0067267	total: 9.31s	remaining: 4.48s
675:	learn: 0.0067191	total: 9.32s	remaining: 4.47s
676:	learn: 0.0067116	total: 9.34s	remaining: 4.46s
677:	learn: 0.0066949	total: 9.34s	remaining: 4.44s
678:	learn: 0.0066875	total: 9.36s	remaining: 4.42s
679:	learn: 0.0066800	total: 9.37s	remaining: 4.41s
680:	learn: 0.0066725	total: 9.39s	remaining: 4.4s
681:	learn: 0.0066651	total: 9.41s	remaining: 4.39s
682:	learn: 0.00

829:	learn: 0.0055916	total: 11.6s	remaining: 2.37s
830:	learn: 0.0055865	total: 11.6s	remaining: 2.35s
831:	learn: 0.0055814	total: 11.6s	remaining: 2.34s
832:	learn: 0.0055762	total: 11.6s	remaining: 2.33s
833:	learn: 0.0055690	total: 11.6s	remaining: 2.31s
834:	learn: 0.0055639	total: 11.6s	remaining: 2.3s
835:	learn: 0.0055588	total: 11.6s	remaining: 2.28s
836:	learn: 0.0055537	total: 11.7s	remaining: 2.27s
837:	learn: 0.0055487	total: 11.7s	remaining: 2.26s
838:	learn: 0.0055436	total: 11.7s	remaining: 2.24s
839:	learn: 0.0055385	total: 11.7s	remaining: 2.23s
840:	learn: 0.0055335	total: 11.7s	remaining: 2.22s
841:	learn: 0.0055264	total: 11.7s	remaining: 2.2s
842:	learn: 0.0055214	total: 11.8s	remaining: 2.19s
843:	learn: 0.0055164	total: 11.8s	remaining: 2.17s
844:	learn: 0.0055114	total: 11.8s	remaining: 2.16s
845:	learn: 0.0055064	total: 11.8s	remaining: 2.15s
846:	learn: 0.0055014	total: 11.8s	remaining: 2.13s
847:	learn: 0.0054964	total: 11.8s	remaining: 2.12s
848:	learn: 0.

993:	learn: 0.0047793	total: 14s	remaining: 84.4ms
994:	learn: 0.0047741	total: 14s	remaining: 70.3ms
995:	learn: 0.0047687	total: 14s	remaining: 56.3ms
996:	learn: 0.0047635	total: 14s	remaining: 42.2ms
997:	learn: 0.0047582	total: 14s	remaining: 28.1ms
998:	learn: 0.0047530	total: 14s	remaining: 14.1ms
999:	learn: 0.0047477	total: 14.1s	remaining: 0us
{'learning_rate': 0.034, 'depth': 12}
0:	learn: 0.6510419	total: 2.93ms	remaining: 2.93s
1:	learn: 0.6230358	total: 5.02ms	remaining: 2.5s
2:	learn: 0.5940834	total: 27.2ms	remaining: 9.03s
3:	learn: 0.5689751	total: 49.7ms	remaining: 12.4s
4:	learn: 0.5484372	total: 70.4ms	remaining: 14s
5:	learn: 0.5255679	total: 72.2ms	remaining: 12s
6:	learn: 0.5036298	total: 93.8ms	remaining: 13.3s
7:	learn: 0.4825145	total: 117ms	remaining: 14.5s
8:	learn: 0.4644286	total: 140ms	remaining: 15.4s
9:	learn: 0.4474000	total: 161ms	remaining: 16s
10:	learn: 0.4270299	total: 183ms	remaining: 16.5s
11:	learn: 0.4139077	total: 191ms	remaining: 15.7s
12:	

161:	learn: 0.0302117	total: 2.88s	remaining: 14.9s
162:	learn: 0.0299771	total: 2.9s	remaining: 14.9s
163:	learn: 0.0296746	total: 2.91s	remaining: 14.8s
164:	learn: 0.0294402	total: 2.93s	remaining: 14.8s
165:	learn: 0.0292237	total: 2.95s	remaining: 14.8s
166:	learn: 0.0289357	total: 2.96s	remaining: 14.8s
167:	learn: 0.0286928	total: 2.98s	remaining: 14.8s
168:	learn: 0.0283333	total: 2.99s	remaining: 14.7s
169:	learn: 0.0281420	total: 3.01s	remaining: 14.7s
170:	learn: 0.0279028	total: 3.03s	remaining: 14.7s
171:	learn: 0.0276302	total: 3.05s	remaining: 14.7s
172:	learn: 0.0273785	total: 3.06s	remaining: 14.7s
173:	learn: 0.0270099	total: 3.09s	remaining: 14.7s
174:	learn: 0.0267759	total: 3.11s	remaining: 14.7s
175:	learn: 0.0265004	total: 3.13s	remaining: 14.7s
176:	learn: 0.0262597	total: 3.16s	remaining: 14.7s
177:	learn: 0.0260542	total: 3.18s	remaining: 14.7s
178:	learn: 0.0257486	total: 3.19s	remaining: 14.6s
179:	learn: 0.0255751	total: 3.21s	remaining: 14.6s
180:	learn: 0

323:	learn: 0.0110840	total: 6.09s	remaining: 12.7s
324:	learn: 0.0110293	total: 6.11s	remaining: 12.7s
325:	learn: 0.0109752	total: 6.14s	remaining: 12.7s
326:	learn: 0.0109378	total: 6.16s	remaining: 12.7s
327:	learn: 0.0108896	total: 6.18s	remaining: 12.7s
328:	learn: 0.0108479	total: 6.2s	remaining: 12.7s
329:	learn: 0.0108067	total: 6.22s	remaining: 12.6s
330:	learn: 0.0107551	total: 6.25s	remaining: 12.6s
331:	learn: 0.0107147	total: 6.27s	remaining: 12.6s
332:	learn: 0.0106791	total: 6.29s	remaining: 12.6s
333:	learn: 0.0106393	total: 6.32s	remaining: 12.6s
334:	learn: 0.0105895	total: 6.34s	remaining: 12.6s
335:	learn: 0.0105505	total: 6.36s	remaining: 12.6s
336:	learn: 0.0105207	total: 6.39s	remaining: 12.6s
337:	learn: 0.0104824	total: 6.41s	remaining: 12.6s
338:	learn: 0.0104444	total: 6.43s	remaining: 12.5s
339:	learn: 0.0104152	total: 6.46s	remaining: 12.5s
340:	learn: 0.0103800	total: 6.48s	remaining: 12.5s
341:	learn: 0.0103450	total: 6.5s	remaining: 12.5s
342:	learn: 0.

486:	learn: 0.0064980	total: 9.3s	remaining: 9.8s
487:	learn: 0.0064803	total: 9.33s	remaining: 9.79s
488:	learn: 0.0064701	total: 9.35s	remaining: 9.77s
489:	learn: 0.0064571	total: 9.37s	remaining: 9.76s
490:	learn: 0.0064396	total: 9.4s	remaining: 9.74s
491:	learn: 0.0064261	total: 9.42s	remaining: 9.73s
492:	learn: 0.0064128	total: 9.43s	remaining: 9.7s
493:	learn: 0.0063956	total: 9.45s	remaining: 9.68s
494:	learn: 0.0063825	total: 9.46s	remaining: 9.66s
495:	learn: 0.0063655	total: 9.49s	remaining: 9.64s
496:	learn: 0.0063486	total: 9.51s	remaining: 9.63s
497:	learn: 0.0063357	total: 9.52s	remaining: 9.6s
498:	learn: 0.0063190	total: 9.55s	remaining: 9.59s
499:	learn: 0.0063024	total: 9.57s	remaining: 9.57s
500:	learn: 0.0062860	total: 9.59s	remaining: 9.55s
501:	learn: 0.0062697	total: 9.62s	remaining: 9.54s
502:	learn: 0.0062534	total: 9.64s	remaining: 9.53s
503:	learn: 0.0062400	total: 9.66s	remaining: 9.51s
504:	learn: 0.0062279	total: 9.69s	remaining: 9.49s
505:	learn: 0.006

645:	learn: 0.0047781	total: 12.9s	remaining: 7.07s
646:	learn: 0.0047703	total: 12.9s	remaining: 7.05s
647:	learn: 0.0047626	total: 12.9s	remaining: 7.03s
648:	learn: 0.0047547	total: 13s	remaining: 7.01s
649:	learn: 0.0047469	total: 13s	remaining: 7s
650:	learn: 0.0047392	total: 13s	remaining: 6.98s
651:	learn: 0.0047314	total: 13s	remaining: 6.96s
652:	learn: 0.0047236	total: 13.1s	remaining: 6.94s
653:	learn: 0.0047160	total: 13.1s	remaining: 6.92s
654:	learn: 0.0047092	total: 13.1s	remaining: 6.91s
655:	learn: 0.0047016	total: 13.1s	remaining: 6.89s
656:	learn: 0.0046927	total: 13.2s	remaining: 6.87s
657:	learn: 0.0046853	total: 13.2s	remaining: 6.85s
658:	learn: 0.0046780	total: 13.2s	remaining: 6.83s
659:	learn: 0.0046705	total: 13.2s	remaining: 6.81s
660:	learn: 0.0046630	total: 13.2s	remaining: 6.79s
661:	learn: 0.0046565	total: 13.3s	remaining: 6.78s
662:	learn: 0.0046504	total: 13.3s	remaining: 6.76s
663:	learn: 0.0046430	total: 13.3s	remaining: 6.74s
664:	learn: 0.0046356	t

804:	learn: 0.0038504	total: 16.5s	remaining: 4s
805:	learn: 0.0038461	total: 16.5s	remaining: 3.98s
806:	learn: 0.0038424	total: 16.6s	remaining: 3.96s
807:	learn: 0.0038375	total: 16.6s	remaining: 3.94s
808:	learn: 0.0038338	total: 16.6s	remaining: 3.92s
809:	learn: 0.0038290	total: 16.6s	remaining: 3.9s
810:	learn: 0.0038248	total: 16.7s	remaining: 3.88s
811:	learn: 0.0038211	total: 16.7s	remaining: 3.86s
812:	learn: 0.0038169	total: 16.7s	remaining: 3.84s
813:	learn: 0.0038127	total: 16.7s	remaining: 3.82s
814:	learn: 0.0038080	total: 16.8s	remaining: 3.8s
815:	learn: 0.0038038	total: 16.8s	remaining: 3.78s
816:	learn: 0.0037996	total: 16.8s	remaining: 3.76s
817:	learn: 0.0037949	total: 16.8s	remaining: 3.74s
818:	learn: 0.0037891	total: 16.8s	remaining: 3.72s
819:	learn: 0.0037848	total: 16.8s	remaining: 3.7s
820:	learn: 0.0037812	total: 16.9s	remaining: 3.68s
821:	learn: 0.0037771	total: 16.9s	remaining: 3.66s
822:	learn: 0.0037729	total: 16.9s	remaining: 3.64s
823:	learn: 0.0037

969:	learn: 0.0032380	total: 20.3s	remaining: 627ms
970:	learn: 0.0032348	total: 20.3s	remaining: 606ms
971:	learn: 0.0032321	total: 20.3s	remaining: 586ms
972:	learn: 0.0032289	total: 20.4s	remaining: 565ms
973:	learn: 0.0032263	total: 20.4s	remaining: 544ms
974:	learn: 0.0032230	total: 20.4s	remaining: 523ms
975:	learn: 0.0032200	total: 20.4s	remaining: 502ms
976:	learn: 0.0032173	total: 20.4s	remaining: 481ms
977:	learn: 0.0032147	total: 20.5s	remaining: 461ms
978:	learn: 0.0032120	total: 20.5s	remaining: 440ms
979:	learn: 0.0032090	total: 20.5s	remaining: 419ms
980:	learn: 0.0032064	total: 20.5s	remaining: 398ms
981:	learn: 0.0032038	total: 20.6s	remaining: 377ms
982:	learn: 0.0032012	total: 20.6s	remaining: 356ms
983:	learn: 0.0031975	total: 20.6s	remaining: 335ms
984:	learn: 0.0031949	total: 20.6s	remaining: 314ms
985:	learn: 0.0031922	total: 20.7s	remaining: 293ms
986:	learn: 0.0031897	total: 20.7s	remaining: 272ms
987:	learn: 0.0031871	total: 20.7s	remaining: 252ms
988:	learn: 

In [6]:
# AdaBoost
ada_model = ensemble.AdaBoostClassifier(random_state=417)

# Hyperparameters
n_estimators = [int(x) for x in np.linspace(50, 500, 20)]
learning_rate = [float(x) for x in np.linspace(0.001, 0.1, 10)]
param_grid = dict(
    n_estimators=n_estimators, 
    learning_rate=learning_rate)

# Random search
rand_search = RandomizedSearchCV(
    ada_model, 
    param_grid, 
    scoring="roc_auc_ovr", 
    cv=ShuffleSplit(n_splits=10, test_size=0.3, random_state=417), 
    n_jobs=multiprocessing.cpu_count(), 
    verbose=1)
rand_result = rand_search.fit(x_train, y_train)
print(rand_result.best_params_)
ada_model = CatBoostClassifier(**rand_result.best_params_)
ada_model.fit(features, labels)

# Evaluate step
y_prob_ada = ada_model.predict_proba(x_valid)
roc_auc_score = metrics.roc_auc_score(y_valid, y_prob_ada[:, 1])
print("ROC AUC: {:.4f}".format(roc_auc_score))

Fitting 10 folds for each of 10 candidates, totalling 100 fits


[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.9s


{'n_estimators': 50, 'learning_rate': 0.05600000000000001}
0:	learn: 0.6253662	total: 2.96ms	remaining: 145ms
1:	learn: 0.5849338	total: 4.44ms	remaining: 107ms
2:	learn: 0.5362799	total: 5.49ms	remaining: 85.9ms
3:	learn: 0.5023256	total: 6.54ms	remaining: 75.2ms
4:	learn: 0.4682630	total: 8.11ms	remaining: 73ms
5:	learn: 0.4451078	total: 9.23ms	remaining: 67.7ms
6:	learn: 0.4168003	total: 10.3ms	remaining: 63.3ms
7:	learn: 0.3894231	total: 11.4ms	remaining: 59.9ms
8:	learn: 0.3669416	total: 12.4ms	remaining: 56.6ms
9:	learn: 0.3412792	total: 13.5ms	remaining: 54.1ms
10:	learn: 0.3246940	total: 14.6ms	remaining: 51.8ms
11:	learn: 0.3057067	total: 15.7ms	remaining: 49.9ms
12:	learn: 0.2893454	total: 16.9ms	remaining: 48ms
13:	learn: 0.2708628	total: 18ms	remaining: 46.2ms
14:	learn: 0.2622050	total: 18.9ms	remaining: 44.2ms
15:	learn: 0.2542613	total: 20ms	remaining: 42.5ms
16:	learn: 0.2467192	total: 21.2ms	remaining: 41.1ms
17:	learn: 0.2349478	total: 22.3ms	remaining: 39.7ms
18:	lea

[Parallel(n_jobs=16)]: Done 100 out of 100 | elapsed:    3.5s finished


In [7]:
# ExtraTrees
etc_model = ensemble.ExtraTreesClassifier(random_state=417)

# Hyperparameters
n_estimators = [int(x) for x in np.linspace(100, 1000, 20)]
max_depth = [int(x) for x in np.linspace(1, 10, 10)]
min_samples_split = [float(x) for x in np.linspace(0.1, 1.0, 10)]
min_samples_leaf = [float(x) for x in np.linspace(0.1, 1.0, 10)]
min_weight_fraction_leaf = [float(x) for x in np.linspace(0.1, 1.0, 10)]
bootstrap = [True, False]
param_grid = dict(
    n_estimators=n_estimators, 
    max_depth=max_depth, 
    min_samples_split=min_samples_split, 
    min_samples_leaf=min_samples_leaf, 
    min_weight_fraction_leaf=min_weight_fraction_leaf, 
    bootstrap=bootstrap)

# Random search
rand_search = RandomizedSearchCV(
    etc_model, 
    param_grid, 
    scoring="roc_auc_ovr", 
    cv=ShuffleSplit(n_splits=10, test_size=0.3, random_state=417), 
    n_jobs=multiprocessing.cpu_count(), 
    verbose=1)
rand_result = rand_search.fit(x_train, y_train)
print(rand_result.best_params_)
etc_model = ensemble.ExtraTreesClassifier(**rand_result.best_params_)
etc_model.fit(features, labels)

# Evaluate step
y_prob_ada = etc_model.predict_proba(x_valid)
roc_auc_score = metrics.roc_auc_score(y_valid, y_prob_ada[:, 1])
print("ROC AUC: {:.4f}".format(roc_auc_score))

[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.


Fitting 10 folds for each of 10 candidates, totalling 100 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.4s
[Parallel(n_jobs=16)]: Done 100 out of 100 | elapsed:    3.5s finished


{'n_estimators': 668, 'min_weight_fraction_leaf': 0.2, 'min_samples_split': 0.1, 'min_samples_leaf': 0.2, 'max_depth': 7, 'bootstrap': True}
ROC AUC: 0.7333


In [8]:
# RandomForest
rfc_model = ensemble.RandomForestClassifier(random_state=417)

# Hyperparameters
n_estimators = [int(x) for x in np.linspace(50, 500, 20)]
max_depth = [int(x) for x in np.linspace(1, 15, 15)]
bootstrap = [True, False]
param_grid = dict(
    n_estimators=n_estimators, 
    max_depth=max_depth, 
    bootstrap=bootstrap)

# Random search
rand_search = RandomizedSearchCV(
    rfc_model, 
    param_grid, 
    scoring="roc_auc_ovr", 
    cv=ShuffleSplit(n_splits=10, test_size=0.3, random_state=417), 
    n_jobs=multiprocessing.cpu_count(), 
    verbose=1)
rand_result = rand_search.fit(x_train, y_train)
print(rand_result.best_params_)
rfc_model = ensemble.RandomForestClassifier(**rand_result.best_params_)
rfc_model.fit(features, labels)

# Evaluate step
y_prob_ada = rfc_model.predict_proba(x_valid)
roc_auc_score = metrics.roc_auc_score(y_valid, y_prob_ada[:, 1])
print("ROC AUC: {:.4f}".format(roc_auc_score))

Fitting 10 folds for each of 10 candidates, totalling 100 fits


[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.6s
[Parallel(n_jobs=16)]: Done 100 out of 100 | elapsed:    3.2s finished


{'n_estimators': 239, 'max_depth': 10, 'bootstrap': True}
ROC AUC: 1.0000


In [13]:
# Naive Bayes classifier
gnb_model = GaussianNB()

# Hyperparameters
param_grid = dict()

# Random search
rand_search = RandomizedSearchCV(
    gnb_model, 
    param_grid, 
    scoring="roc_auc_ovr", 
    cv=ShuffleSplit(n_splits=10, test_size=0.3, random_state=417), 
    n_jobs=multiprocessing.cpu_count(), 
    verbose=1)
rand_result = rand_search.fit(x_train, y_train)
print(rand_result.best_params_)
gnb_model = GaussianNB(**rand_result.best_params_)
gnb_model.fit(features, labels)

# Evaluate step
y_prob_gnb = gnb_model.predict_proba(x_valid)
roc_auc_score = metrics.roc_auc_score(y_valid, y_prob_gnb[:, 1])
print("ROC AUC: {:.4f}".format(roc_auc_score))

Fitting 10 folds for each of 1 candidates, totalling 10 fits
{}
ROC AUC: 0.8875


[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  10 out of  10 | elapsed:    0.0s finished


In [16]:
# MLP
mlp_model = MLPClassifier(random_state=417)

# Hyperparameters
hidden_layer_sizes = [int(x) for x in np.linspace(10, 100, 10)]
activation = ["identity", "logistic", "tanh", "relu"]
alpha = [float(x) for x in np.linspace(0.0001, 0.001, 10)]
momentum = [float(x) for x in np.linspace(0.1, 1, 10)]
early_stopping = [False]
param_grid = dict(
    hidden_layer_sizes=hidden_layer_sizes, 
    activation=activation,
    alpha=alpha, 
    momentum=momentum, 
    early_stopping=early_stopping)

# Random search
rand_search = RandomizedSearchCV(
    mlp_model, 
    param_grid, 
    scoring="roc_auc_ovr", 
    cv=ShuffleSplit(n_splits=10, test_size=0.3, random_state=417), 
    n_jobs=multiprocessing.cpu_count(), 
    verbose=1)
rand_result = rand_search.fit(x_train, y_train)
print(rand_result.best_params_)
mlp_model = MLPClassifier(**rand_result.best_params_)
mlp_model.fit(features, labels)

# Evaluate step
y_prob_mlp = mlp_model.predict_proba(x_valid)
roc_auc_score = metrics.roc_auc_score(y_valid, y_prob_mlp[:, 1])
print("ROC AUC: {:.4f}".format(roc_auc_score))

Fitting 10 folds for each of 10 candidates, totalling 100 fits


[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.1s


{'momentum': 0.9, 'hidden_layer_sizes': 60, 'early_stopping': False, 'alpha': 0.0005, 'activation': 'tanh'}
ROC AUC: 0.7958


[Parallel(n_jobs=16)]: Done 100 out of 100 | elapsed:    0.5s finished


In [17]:
def transform_data(df, train_data=True, scale=None):
    
    # Get features and label
    if train_data: 
        # Remove "LOCKHART EUGENE E" and "THE TRAVEL AGENCY IN THE PARK" samples
        df = df.drop(index=107, axis=0)
        df = df.drop(index=65, axis=0)
        labels = df["poi"]
        features = df.drop(["poi"], axis=1)
    else: 
        labels = None
        features = df.copy()
    
    # Adding the proportions
    features.loc[:, "long_term_incentive_p"] = features.loc[:, "long_term_incentive"]/features.loc[:, "total_payments"]
    features.loc[:, "restricted_stock_deferred_p"] = features.loc[:, "restricted_stock_deferred"]/features.loc[:, "total_stock_value"]
    features.loc[:, "from_this_person_to_poi_p"] = features.loc[:, "from_this_person_to_poi"]/features.loc[:, "from_messages"]
    
    # Removing the original values.
    features.drop("long_term_incentive", axis=1)
    features.drop("restricted_stock_deferred", axis=1)
    features.drop("from_this_person_to_poi", axis=1)
    
    # Select numerical feature (float or integer)
    num_features = []
    for dtype, feature in zip(features.dtypes, features.columns):
        if dtype == 'float64' or dtype == 'int64':
            num_features.append(feature)
            
    # Fill nan
    imp_median = SimpleImputer(missing_values=np.nan, strategy='median')
    imp_median.fit(features[num_features])
    features = imp_median.transform(features[num_features])
    
    # Scale the data
    if scale is not None:
        if scale.lower() == "mmencoder":
            MMEncoder = MinMaxScaler()
            features = MMEncoder.fit_transform(features)
        elif scale.lower() == "ssencoder":
            SSEncoder = StandardScaler()
            features = SSEncoder.fit_transform(features)
        else:
            print("Only MinMaxScaler() and StandardScaler() are available.")
            
    return features, labels

In [23]:
# Read in test data
test = pd.read_csv("test_features.csv")
x_test, _ = transform_data(test, train_data=False, scale=None)

# Pass in model
test_prob_lgm = lgb_model.predict_proba(x_test)[:, 1]
test_prob_xgb = xgb_model.predict_proba(x_test)[:, 1]
test_prob_cat = cat_model.predict_proba(x_test)[:, 1]
test_prob_ada = ada_model.predict_proba(x_test)[:, 1]
test_prob_rfc = rfc_model.predict_proba(x_test)[:, 1]
test_prob_gnb = gnb_model.predict_proba(x_test)[:, 1]
test_prob_mlp = mlp_model.predict_proba(x_test)[:, 1]

blending_prob = (test_prob_lgm + test_prob_xgb + test_prob_cat + 
                 test_prob_ada + test_prob_rfc + test_prob_gnb + test_prob_mlp) / 7

submit = pd.read_csv("sample_submission.csv")
submit['poi'] = blending_prob
submit.to_csv('full_blending_7.csv', index=False)

In [24]:
from mlxtend.classifier import StackingClassifier

meta_estimator = ensemble.GradientBoostingClassifier(
    tol=100, subsample=0.70, n_estimators=50, max_features='sqrt', max_depth=4, learning_rate=0.3)
stacking_model = StackingClassifier(
    classifiers=[lgb_model, xgb_model, cat_model, ada_model, rfc_model, gnb_model, mlp_model], 
    meta_classifier=meta_estimator, 
    use_probas=True, 
    average_probas=False)
stacking_model.fit(features, labels)
stacking_prob = stacking_model.predict_proba(x_test)[:, 1]

submit = pd.read_csv("sample_submission.csv")
submit['poi'] = stacking_prob
submit.to_csv('Submission_20200627_stacking.csv', index=False)

0:	learn: 0.6510419	total: 3.27ms	remaining: 3.27s
1:	learn: 0.6230358	total: 5.48ms	remaining: 2.73s
2:	learn: 0.5940834	total: 28.1ms	remaining: 9.35s
3:	learn: 0.5689751	total: 53.1ms	remaining: 13.2s
4:	learn: 0.5484372	total: 76.8ms	remaining: 15.3s
5:	learn: 0.5255679	total: 78.4ms	remaining: 13s
6:	learn: 0.5036298	total: 99.7ms	remaining: 14.1s
7:	learn: 0.4825145	total: 122ms	remaining: 15.1s
8:	learn: 0.4644286	total: 144ms	remaining: 15.8s
9:	learn: 0.4474000	total: 167ms	remaining: 16.5s
10:	learn: 0.4270299	total: 190ms	remaining: 17s
11:	learn: 0.4139077	total: 196ms	remaining: 16.1s
12:	learn: 0.3989677	total: 219ms	remaining: 16.6s
13:	learn: 0.3829151	total: 240ms	remaining: 16.9s
14:	learn: 0.3652994	total: 264ms	remaining: 17.3s
15:	learn: 0.3502150	total: 274ms	remaining: 16.9s
16:	learn: 0.3367749	total: 294ms	remaining: 17s
17:	learn: 0.3237851	total: 316ms	remaining: 17.2s
18:	learn: 0.3124842	total: 339ms	remaining: 17.5s
19:	learn: 0.3023556	total: 350ms	remain

171:	learn: 0.0276302	total: 3.05s	remaining: 14.7s
172:	learn: 0.0273785	total: 3.06s	remaining: 14.6s
173:	learn: 0.0270099	total: 3.08s	remaining: 14.6s
174:	learn: 0.0267759	total: 3.1s	remaining: 14.6s
175:	learn: 0.0265004	total: 3.13s	remaining: 14.6s
176:	learn: 0.0262597	total: 3.15s	remaining: 14.6s
177:	learn: 0.0260542	total: 3.17s	remaining: 14.6s
178:	learn: 0.0257486	total: 3.18s	remaining: 14.6s
179:	learn: 0.0255751	total: 3.2s	remaining: 14.6s
180:	learn: 0.0254235	total: 3.22s	remaining: 14.6s
181:	learn: 0.0252405	total: 3.24s	remaining: 14.6s
182:	learn: 0.0250966	total: 3.27s	remaining: 14.6s
183:	learn: 0.0248874	total: 3.29s	remaining: 14.6s
184:	learn: 0.0246561	total: 3.31s	remaining: 14.6s
185:	learn: 0.0244032	total: 3.33s	remaining: 14.6s
186:	learn: 0.0242263	total: 3.35s	remaining: 14.6s
187:	learn: 0.0240216	total: 3.37s	remaining: 14.6s
188:	learn: 0.0237524	total: 3.39s	remaining: 14.6s
189:	learn: 0.0235830	total: 3.42s	remaining: 14.6s
190:	learn: 0.

337:	learn: 0.0104824	total: 6.33s	remaining: 12.4s
338:	learn: 0.0104444	total: 6.35s	remaining: 12.4s
339:	learn: 0.0104152	total: 6.37s	remaining: 12.4s
340:	learn: 0.0103800	total: 6.4s	remaining: 12.4s
341:	learn: 0.0103450	total: 6.42s	remaining: 12.4s
342:	learn: 0.0103094	total: 6.44s	remaining: 12.3s
343:	learn: 0.0102810	total: 6.46s	remaining: 12.3s
344:	learn: 0.0102346	total: 6.49s	remaining: 12.3s
345:	learn: 0.0101984	total: 6.51s	remaining: 12.3s
346:	learn: 0.0101625	total: 6.53s	remaining: 12.3s
347:	learn: 0.0101174	total: 6.56s	remaining: 12.3s
348:	learn: 0.0100817	total: 6.58s	remaining: 12.3s
349:	learn: 0.0100578	total: 6.6s	remaining: 12.3s
350:	learn: 0.0100136	total: 6.62s	remaining: 12.2s
351:	learn: 0.0099700	total: 6.64s	remaining: 12.2s
352:	learn: 0.0099268	total: 6.67s	remaining: 12.2s
353:	learn: 0.0098840	total: 6.69s	remaining: 12.2s
354:	learn: 0.0098303	total: 6.7s	remaining: 12.2s
355:	learn: 0.0098028	total: 6.72s	remaining: 12.2s
356:	learn: 0.0

502:	learn: 0.0062534	total: 9.52s	remaining: 9.4s
503:	learn: 0.0062400	total: 9.54s	remaining: 9.38s
504:	learn: 0.0062279	total: 9.56s	remaining: 9.37s
505:	learn: 0.0062119	total: 9.58s	remaining: 9.35s
506:	learn: 0.0061960	total: 9.6s	remaining: 9.34s
507:	learn: 0.0061837	total: 9.61s	remaining: 9.31s
508:	learn: 0.0061680	total: 9.64s	remaining: 9.29s
509:	learn: 0.0061524	total: 9.66s	remaining: 9.28s
510:	learn: 0.0061369	total: 9.68s	remaining: 9.26s
511:	learn: 0.0061216	total: 9.7s	remaining: 9.25s
512:	learn: 0.0061062	total: 9.72s	remaining: 9.23s
513:	learn: 0.0060911	total: 9.75s	remaining: 9.22s
514:	learn: 0.0060760	total: 9.77s	remaining: 9.2s
515:	learn: 0.0060609	total: 9.79s	remaining: 9.19s
516:	learn: 0.0060460	total: 9.82s	remaining: 9.17s
517:	learn: 0.0060306	total: 9.84s	remaining: 9.15s
518:	learn: 0.0060153	total: 9.86s	remaining: 9.14s
519:	learn: 0.0060007	total: 9.88s	remaining: 9.12s
520:	learn: 0.0059861	total: 9.91s	remaining: 9.11s
521:	learn: 0.00

665:	learn: 0.0046295	total: 13.2s	remaining: 6.6s
666:	learn: 0.0046221	total: 13.2s	remaining: 6.59s
667:	learn: 0.0046158	total: 13.2s	remaining: 6.57s
668:	learn: 0.0046095	total: 13.2s	remaining: 6.55s
669:	learn: 0.0046032	total: 13.3s	remaining: 6.53s
670:	learn: 0.0045979	total: 13.3s	remaining: 6.51s
671:	learn: 0.0045916	total: 13.3s	remaining: 6.49s
672:	learn: 0.0045844	total: 13.3s	remaining: 6.47s
673:	learn: 0.0045773	total: 13.3s	remaining: 6.46s
674:	learn: 0.0045701	total: 13.4s	remaining: 6.44s
675:	learn: 0.0045641	total: 13.4s	remaining: 6.42s
676:	learn: 0.0045570	total: 13.4s	remaining: 6.4s
677:	learn: 0.0045499	total: 13.4s	remaining: 6.38s
678:	learn: 0.0045448	total: 13.5s	remaining: 6.37s
679:	learn: 0.0045396	total: 13.5s	remaining: 6.35s
680:	learn: 0.0045326	total: 13.5s	remaining: 6.33s
681:	learn: 0.0045256	total: 13.5s	remaining: 6.31s
682:	learn: 0.0045187	total: 13.6s	remaining: 6.29s
683:	learn: 0.0045136	total: 13.6s	remaining: 6.27s
684:	learn: 0.

826:	learn: 0.0037570	total: 16.8s	remaining: 3.5s
827:	learn: 0.0037528	total: 16.8s	remaining: 3.48s
828:	learn: 0.0037488	total: 16.8s	remaining: 3.46s
829:	learn: 0.0037431	total: 16.8s	remaining: 3.44s
830:	learn: 0.0037378	total: 16.8s	remaining: 3.42s
831:	learn: 0.0037333	total: 16.8s	remaining: 3.4s
832:	learn: 0.0037292	total: 16.9s	remaining: 3.38s
833:	learn: 0.0037251	total: 16.9s	remaining: 3.36s
834:	learn: 0.0037213	total: 16.9s	remaining: 3.34s
835:	learn: 0.0037173	total: 16.9s	remaining: 3.32s
836:	learn: 0.0037133	total: 17s	remaining: 3.3s
837:	learn: 0.0037093	total: 17s	remaining: 3.28s
838:	learn: 0.0037054	total: 17s	remaining: 3.26s
839:	learn: 0.0037014	total: 17s	remaining: 3.24s
840:	learn: 0.0036974	total: 17s	remaining: 3.22s
841:	learn: 0.0036934	total: 17.1s	remaining: 3.2s
842:	learn: 0.0036894	total: 17.1s	remaining: 3.18s
843:	learn: 0.0036855	total: 17.1s	remaining: 3.16s
844:	learn: 0.0036811	total: 17.1s	remaining: 3.14s
845:	learn: 0.0036767	tota

993:	learn: 0.0031710	total: 20.5s	remaining: 123ms
994:	learn: 0.0031684	total: 20.5s	remaining: 103ms
995:	learn: 0.0031658	total: 20.5s	remaining: 82.3ms
996:	learn: 0.0031632	total: 20.5s	remaining: 61.7ms
997:	learn: 0.0031606	total: 20.5s	remaining: 41.2ms
998:	learn: 0.0031581	total: 20.6s	remaining: 20.6ms
999:	learn: 0.0031551	total: 20.6s	remaining: 0us
0:	learn: 0.6253662	total: 2.83ms	remaining: 139ms
1:	learn: 0.5849338	total: 4.1ms	remaining: 98.5ms
2:	learn: 0.5362799	total: 5.14ms	remaining: 80.5ms
3:	learn: 0.5023256	total: 6.21ms	remaining: 71.4ms
4:	learn: 0.4682630	total: 7.27ms	remaining: 65.5ms
5:	learn: 0.4451078	total: 8.33ms	remaining: 61.1ms
6:	learn: 0.4168003	total: 9.39ms	remaining: 57.7ms
7:	learn: 0.3894231	total: 10.4ms	remaining: 54.5ms
8:	learn: 0.3669416	total: 11.7ms	remaining: 53.2ms
9:	learn: 0.3412792	total: 12.8ms	remaining: 51.3ms
10:	learn: 0.3246940	total: 13.9ms	remaining: 49.4ms
11:	learn: 0.3057067	total: 15ms	remaining: 47.6ms
12:	learn: 0

In [25]:
# https://www.kaggle.com/c/2020-ml100marathon-midterm/submissions
# https://www.kaggle.com/c/ml100/leaderboard
! kaggle competitions submit -c 2020-ml100marathon-midterm -f full_blending_7.csv -m "Full blending 7 models"

Successfully submitted to 2020_ML100Marathon Midterm



  0%|          | 0.00/1.18k [00:00<?, ?B/s]
100%|██████████| 1.18k/1.18k [00:05<00:00, 215B/s]


In [1]:
# https://www.kaggle.com/c/2020-ml100marathon-midterm/submissions
! kaggle competitions submit -c 2020-ml100marathon-midterm -f Submission_20200627_stacking.csv -m "Full stacking 7 models"

Successfully submitted to 2020_ML100Marathon Midterm



  0%|          | 0.00/1.24k [00:00<?, ?B/s]
100%|██████████| 1.24k/1.24k [00:09<00:00, 132B/s]
