In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import cross_validate
from sklearn.preprocessing import LabelEncoder

import xgboost

sns.set_theme(style="whitegrid")
sns.set_palette(palette="Paired")
SEED: int = 42

In [2]:
train_data = pd.read_csv("../../data/processed/train_data_cleaned.csv", index_col="building_id")
y_train = pd.read_csv("../../data/processed/train_labels.csv", index_col="building_id", usecols=["building_id", "damage_grade"])

In [3]:
encoder = LabelEncoder()
train_labels = encoder.fit_transform(y_train["damage_grade"].to_numpy())

In [4]:
def hyperparameter_optimization(model=None, hyperparameter_grid=None, train_data=None, train_labels=None, scoring=None):
    # Get train labels in shape that .fit() expects
    if model == "XGBoost":
        # XGBoost expects [0, 1, 2] instead of [1, 2, 3]
        encoder = LabelEncoder()
        train_labels = encoder.fit_transform(train_labels["damage_grade"].to_numpy())
    else:
        train_labels = train_labels["damage_grade"].ravel()

    # Choose model based on input
    if model == "Dummy":
        model = DummyClassifier(strategy="most_frequent")
    elif model == "RandomForest":
        print("Fitting RandomForest ...")
        model = RandomForestClassifier(random_state=42)
    elif model == "DecisionTree":
        print("Fitting DecisionTree ...")
        model = DecisionTreeClassifier(random_state=42)
    elif model == "XGBoost":
        print("Fitting XGBoost ...")
        #model = xgboost.XGBClassifier(random_state=42, n_jobs=-1)
        model = xgboost.XGBClassifier(n_estimators=100,
                                      max_depth=20,
                                      learning_rate=0.1,
                                      subsample=0.8,
                                      colsample_bytree=0.8,
                                      random_state=42,
                                      n_jobs=-1)


    cv_results = cross_validate(model, train_data, train_labels, cv=5,
                                scoring=scoring,
                                n_jobs=-1,
                                return_train_score=True)
    model.fit(train_data, train_labels)


    print("")
    print(f"CV Training ACC: {round(np.mean(cv_results['train_accuracy']), 4)} +/- {round(np.std(cv_results['train_accuracy']), 4)} ")
    print(f"CV Test ACC: {round(np.mean(cv_results['test_accuracy']), 4)} +/- {round(np.std(cv_results['test_accuracy']), 4)}")
    print("")
    print(f"CV Training MCC: {round(np.mean(cv_results['train_matthews_corrcoef']), 4)} +/- {round(np.std(cv_results['train_matthews_corrcoef']), 4)} ")
    print(f"CV Test MCC: {round(np.mean(cv_results['test_matthews_corrcoef']), 4)} +/- {round(np.std(cv_results['test_matthews_corrcoef']), 4)}")
    print("")
    
    return model, cv_results

# Baseline (XGBoost)

In [5]:
%%time

model, cv_results = hyperparameter_optimization(model="XGBoost",
                                                train_data=train_data[:10000],
                                                train_labels=y_train[:10000],
                                                scoring=["accuracy", "matthews_corrcoef"])

Fitting XGBoost ...

CV Training ACC: 0.9991 +/- 0.0002 
CV Test ACC: 0.6816 +/- 0.0068

CV Training MCC: 0.9984 +/- 0.0004 
CV Test MCC: 0.3998 +/- 0.0124

CPU times: user 29.5 s, sys: 1.18 s, total: 30.7 s
Wall time: 37.6 s


# LGBM

In [6]:
import lightgbm

In [7]:
%%time

# Use model with default parameters
lgbm = lightgbm.LGBMClassifier()

model, cv_results = hyperparameter_optimization(model=lgbm,
                                                train_data=train_data[:10000],
                                                train_labels=y_train[:10000],
                                                scoring=["accuracy", "matthews_corrcoef"])


CV Training ACC: 0.82 +/- 0.0018 
CV Test ACC: 0.6875 +/- 0.0114

CV Training MCC: 0.6689 +/- 0.0035 
CV Test MCC: 0.4105 +/- 0.0212

CPU times: user 1.57 s, sys: 105 ms, total: 1.68 s
Wall time: 4.93 s


In [8]:
%%time

# Use model with same parameters as XGBoost - as far as possible
lgbm_2 = lightgbm.LGBMClassifier(n_estimators=100,
                                 max_depth=20,
                                 learning_rate=0.1,
                                 subsample=0.8,
                                 colsample_bytree=0.8,
                                 random_state=42,
                                 n_jobs=-1)

model, cv_results = hyperparameter_optimization(model=lgbm_2,
                                                train_data=train_data[:10000],
                                                train_labels=y_train[:10000],
                                                scoring=["accuracy", "matthews_corrcoef"])


CV Training ACC: 0.8178 +/- 0.0026 
CV Test ACC: 0.6839 +/- 0.0121

CV Training MCC: 0.6645 +/- 0.0049 
CV Test MCC: 0.4026 +/- 0.0223

CPU times: user 1.84 s, sys: 47.2 ms, total: 1.89 s
Wall time: 2.61 s


# CaTBoost

In [9]:
import catboost

In [10]:
%%time

# Use model with default parameters
catclf = catboost.CatBoostClassifier()

model, cv_results = hyperparameter_optimization(model=catclf,
                                                train_data=train_data[:10000],
                                                train_labels=y_train[:10000],
                                                scoring=["accuracy", "matthews_corrcoef"])

Learning rate set to 0.087979
0:	learn: 1.0602394	total: 61.8ms	remaining: 1m 1s
1:	learn: 1.0289560	total: 85.9ms	remaining: 42.9s
2:	learn: 0.9976894	total: 114ms	remaining: 37.8s
3:	learn: 0.9739553	total: 142ms	remaining: 35.2s
4:	learn: 0.9489785	total: 162ms	remaining: 32.3s
5:	learn: 0.9300524	total: 183ms	remaining: 30.3s
6:	learn: 0.9144847	total: 213ms	remaining: 30.2s
7:	learn: 0.9000868	total: 225ms	remaining: 27.9s
8:	learn: 0.8855052	total: 263ms	remaining: 29s
9:	learn: 0.8697866	total: 283ms	remaining: 28s
10:	learn: 0.8547823	total: 291ms	remaining: 26.1s
11:	learn: 0.8418605	total: 302ms	remaining: 24.9s
12:	learn: 0.8319662	total: 334ms	remaining: 25.4s
13:	learn: 0.8234393	total: 359ms	remaining: 25.3s
14:	learn: 0.8156365	total: 388ms	remaining: 25.5s
15:	learn: 0.8066389	total: 412ms	remaining: 25.3s
16:	learn: 0.8007204	total: 429ms	remaining: 24.8s
17:	learn: 0.7961444	total: 453ms	remaining: 24.7s
18:	learn: 0.7903681	total: 494ms	remaining: 25.5s
19:	learn: 0.

159:	learn: 0.6418784	total: 4.05s	remaining: 21.3s
160:	learn: 0.6414620	total: 4.08s	remaining: 21.3s
161:	learn: 0.6409245	total: 4.12s	remaining: 21.3s
162:	learn: 0.6402289	total: 4.16s	remaining: 21.4s
163:	learn: 0.6398910	total: 4.2s	remaining: 21.4s
164:	learn: 0.6392996	total: 4.23s	remaining: 21.4s
165:	learn: 0.6387927	total: 4.28s	remaining: 21.5s
166:	learn: 0.6383853	total: 4.31s	remaining: 21.5s
167:	learn: 0.6378136	total: 4.37s	remaining: 21.6s
168:	learn: 0.6373191	total: 4.4s	remaining: 21.6s
169:	learn: 0.6369957	total: 4.43s	remaining: 21.6s
170:	learn: 0.6366364	total: 4.46s	remaining: 21.6s
171:	learn: 0.6359621	total: 4.5s	remaining: 21.7s
172:	learn: 0.6355600	total: 4.54s	remaining: 21.7s
173:	learn: 0.6352235	total: 4.58s	remaining: 21.8s
174:	learn: 0.6346342	total: 4.61s	remaining: 21.7s
175:	learn: 0.6340254	total: 4.65s	remaining: 21.8s
176:	learn: 0.6333356	total: 4.71s	remaining: 21.9s
177:	learn: 0.6330175	total: 4.76s	remaining: 22s
178:	learn: 0.632

159:	learn: 0.6455292	total: 4.56s	remaining: 24s
160:	learn: 0.6451468	total: 4.61s	remaining: 24s
161:	learn: 0.6446824	total: 4.65s	remaining: 24.1s
162:	learn: 0.6443258	total: 4.69s	remaining: 24.1s
163:	learn: 0.6437058	total: 4.77s	remaining: 24.3s
164:	learn: 0.6431850	total: 4.79s	remaining: 24.2s
165:	learn: 0.6428052	total: 4.83s	remaining: 24.3s
166:	learn: 0.6423419	total: 4.89s	remaining: 24.4s
167:	learn: 0.6417282	total: 4.92s	remaining: 24.4s
168:	learn: 0.6412514	total: 4.95s	remaining: 24.4s
169:	learn: 0.6406642	total: 4.98s	remaining: 24.3s
170:	learn: 0.6402497	total: 5.01s	remaining: 24.3s
171:	learn: 0.6397443	total: 5.04s	remaining: 24.3s
172:	learn: 0.6394024	total: 5.06s	remaining: 24.2s
173:	learn: 0.6390573	total: 5.07s	remaining: 24.1s
174:	learn: 0.6385413	total: 5.08s	remaining: 23.9s
175:	learn: 0.6378477	total: 5.1s	remaining: 23.9s
176:	learn: 0.6374335	total: 5.13s	remaining: 23.9s
177:	learn: 0.6367189	total: 5.16s	remaining: 23.8s
178:	learn: 0.636

317:	learn: 0.5802140	total: 8.64s	remaining: 18.5s
318:	learn: 0.5798822	total: 8.65s	remaining: 18.5s
319:	learn: 0.5795760	total: 8.67s	remaining: 18.4s
320:	learn: 0.5792363	total: 8.68s	remaining: 18.4s
321:	learn: 0.5789619	total: 8.69s	remaining: 18.3s
322:	learn: 0.5787145	total: 8.71s	remaining: 18.3s
323:	learn: 0.5784391	total: 8.72s	remaining: 18.2s
324:	learn: 0.5782112	total: 8.75s	remaining: 18.2s
325:	learn: 0.5779555	total: 8.79s	remaining: 18.2s
326:	learn: 0.5775344	total: 8.82s	remaining: 18.1s
327:	learn: 0.5773559	total: 8.83s	remaining: 18.1s
328:	learn: 0.5768971	total: 8.84s	remaining: 18s
329:	learn: 0.5764387	total: 8.87s	remaining: 18s
330:	learn: 0.5761102	total: 8.88s	remaining: 17.9s
331:	learn: 0.5756570	total: 8.9s	remaining: 17.9s
332:	learn: 0.5754917	total: 8.93s	remaining: 17.9s
333:	learn: 0.5752053	total: 8.97s	remaining: 17.9s
334:	learn: 0.5748334	total: 9.03s	remaining: 17.9s
335:	learn: 0.5745058	total: 9.06s	remaining: 17.9s
336:	learn: 0.574

475:	learn: 0.5269745	total: 10.5s	remaining: 11.6s
476:	learn: 0.5267466	total: 10.6s	remaining: 11.6s
477:	learn: 0.5265979	total: 10.6s	remaining: 11.6s
478:	learn: 0.5262686	total: 10.7s	remaining: 11.6s
479:	learn: 0.5260774	total: 10.7s	remaining: 11.6s
480:	learn: 0.5258607	total: 10.7s	remaining: 11.6s
481:	learn: 0.5255676	total: 10.7s	remaining: 11.5s
482:	learn: 0.5253727	total: 10.7s	remaining: 11.5s
483:	learn: 0.5250869	total: 10.8s	remaining: 11.5s
484:	learn: 0.5247824	total: 10.8s	remaining: 11.5s
485:	learn: 0.5246084	total: 10.8s	remaining: 11.4s
486:	learn: 0.5244200	total: 10.8s	remaining: 11.4s
487:	learn: 0.5240615	total: 10.8s	remaining: 11.3s
488:	learn: 0.5238220	total: 10.8s	remaining: 11.3s
489:	learn: 0.5235275	total: 10.8s	remaining: 11.3s
490:	learn: 0.5230398	total: 10.9s	remaining: 11.3s
491:	learn: 0.5228493	total: 10.9s	remaining: 11.2s
492:	learn: 0.5224541	total: 10.9s	remaining: 11.2s
493:	learn: 0.5219296	total: 10.9s	remaining: 11.2s
494:	learn: 

475:	learn: 0.5228311	total: 12.4s	remaining: 13.6s
476:	learn: 0.5225231	total: 12.4s	remaining: 13.6s
477:	learn: 0.5222379	total: 12.4s	remaining: 13.6s
478:	learn: 0.5220551	total: 12.5s	remaining: 13.6s
479:	learn: 0.5217120	total: 12.5s	remaining: 13.5s
480:	learn: 0.5212498	total: 12.5s	remaining: 13.5s
481:	learn: 0.5210713	total: 12.5s	remaining: 13.5s
482:	learn: 0.5207222	total: 12.5s	remaining: 13.4s
483:	learn: 0.5205641	total: 12.6s	remaining: 13.4s
484:	learn: 0.5203016	total: 12.6s	remaining: 13.4s
485:	learn: 0.5198959	total: 12.6s	remaining: 13.3s
486:	learn: 0.5196043	total: 12.6s	remaining: 13.3s
487:	learn: 0.5193667	total: 12.6s	remaining: 13.2s
488:	learn: 0.5191434	total: 12.6s	remaining: 13.2s
489:	learn: 0.5190004	total: 12.6s	remaining: 13.2s
490:	learn: 0.5187654	total: 12.7s	remaining: 13.2s
491:	learn: 0.5183814	total: 12.7s	remaining: 13.1s
492:	learn: 0.5181103	total: 12.7s	remaining: 13.1s
493:	learn: 0.5179382	total: 12.8s	remaining: 13.1s
494:	learn: 

633:	learn: 0.4863132	total: 13.1s	remaining: 7.56s
634:	learn: 0.4860257	total: 13.1s	remaining: 7.54s
635:	learn: 0.4857367	total: 13.1s	remaining: 7.51s
636:	learn: 0.4855634	total: 13.2s	remaining: 7.49s
637:	learn: 0.4853918	total: 13.2s	remaining: 7.47s
638:	learn: 0.4850596	total: 13.2s	remaining: 7.45s
639:	learn: 0.4847977	total: 13.2s	remaining: 7.42s
640:	learn: 0.4845871	total: 13.2s	remaining: 7.39s
641:	learn: 0.4843622	total: 13.2s	remaining: 7.36s
642:	learn: 0.4842459	total: 13.2s	remaining: 7.34s
643:	learn: 0.4839520	total: 13.3s	remaining: 7.37s
644:	learn: 0.4837679	total: 13.4s	remaining: 7.35s
645:	learn: 0.4836228	total: 13.4s	remaining: 7.34s
646:	learn: 0.4834723	total: 13.4s	remaining: 7.31s
647:	learn: 0.4832454	total: 13.4s	remaining: 7.29s
648:	learn: 0.4830680	total: 13.4s	remaining: 7.26s
649:	learn: 0.4828821	total: 13.4s	remaining: 7.23s
650:	learn: 0.4826160	total: 13.4s	remaining: 7.21s
651:	learn: 0.4824127	total: 13.5s	remaining: 7.2s
652:	learn: 0

634:	learn: 0.4814883	total: 15.3s	remaining: 8.8s
635:	learn: 0.4812383	total: 15.3s	remaining: 8.77s
636:	learn: 0.4810591	total: 15.3s	remaining: 8.74s
637:	learn: 0.4807690	total: 15.4s	remaining: 8.72s
638:	learn: 0.4805999	total: 15.4s	remaining: 8.7s
639:	learn: 0.4803932	total: 15.4s	remaining: 8.68s
640:	learn: 0.4801286	total: 15.5s	remaining: 8.67s
641:	learn: 0.4799612	total: 15.5s	remaining: 8.65s
642:	learn: 0.4798218	total: 15.5s	remaining: 8.63s
643:	learn: 0.4796581	total: 15.6s	remaining: 8.6s
644:	learn: 0.4793143	total: 15.6s	remaining: 8.58s
645:	learn: 0.4792449	total: 15.6s	remaining: 8.55s
646:	learn: 0.4790713	total: 15.6s	remaining: 8.54s
647:	learn: 0.4787487	total: 15.7s	remaining: 8.52s
648:	learn: 0.4784158	total: 15.7s	remaining: 8.5s
649:	learn: 0.4782000	total: 15.8s	remaining: 8.48s
650:	learn: 0.4779208	total: 15.8s	remaining: 8.46s
651:	learn: 0.4777899	total: 15.8s	remaining: 8.43s
652:	learn: 0.4774557	total: 15.8s	remaining: 8.41s
653:	learn: 0.47

791:	learn: 0.4515646	total: 17s	remaining: 4.47s
792:	learn: 0.4514069	total: 17s	remaining: 4.44s
793:	learn: 0.4511530	total: 17s	remaining: 4.42s
794:	learn: 0.4509867	total: 17.1s	remaining: 4.4s
795:	learn: 0.4507387	total: 17.1s	remaining: 4.38s
796:	learn: 0.4504963	total: 17.1s	remaining: 4.35s
797:	learn: 0.4502882	total: 17.1s	remaining: 4.33s
798:	learn: 0.4500441	total: 17.1s	remaining: 4.31s
799:	learn: 0.4499337	total: 17.1s	remaining: 4.29s
800:	learn: 0.4497660	total: 17.2s	remaining: 4.26s
801:	learn: 0.4496040	total: 17.2s	remaining: 4.24s
802:	learn: 0.4493346	total: 17.2s	remaining: 4.22s
803:	learn: 0.4490941	total: 17.2s	remaining: 4.19s
804:	learn: 0.4487959	total: 17.3s	remaining: 4.18s
805:	learn: 0.4485435	total: 17.3s	remaining: 4.17s
806:	learn: 0.4483667	total: 17.4s	remaining: 4.15s
807:	learn: 0.4481380	total: 17.4s	remaining: 4.13s
808:	learn: 0.4477567	total: 17.4s	remaining: 4.12s
809:	learn: 0.4476265	total: 17.5s	remaining: 4.1s
810:	learn: 0.447464

792:	learn: 0.4475989	total: 19.7s	remaining: 5.14s
793:	learn: 0.4474061	total: 19.7s	remaining: 5.11s
794:	learn: 0.4472379	total: 19.7s	remaining: 5.08s
795:	learn: 0.4470316	total: 19.7s	remaining: 5.06s
796:	learn: 0.4467787	total: 19.7s	remaining: 5.03s
797:	learn: 0.4465163	total: 19.7s	remaining: 5s
798:	learn: 0.4462353	total: 19.8s	remaining: 4.97s
799:	learn: 0.4460342	total: 19.8s	remaining: 4.95s
800:	learn: 0.4458219	total: 19.8s	remaining: 4.92s
801:	learn: 0.4455699	total: 19.8s	remaining: 4.89s
802:	learn: 0.4454581	total: 19.8s	remaining: 4.86s
803:	learn: 0.4450172	total: 19.9s	remaining: 4.84s
804:	learn: 0.4449135	total: 19.9s	remaining: 4.82s
805:	learn: 0.4446414	total: 19.9s	remaining: 4.79s
806:	learn: 0.4443910	total: 20s	remaining: 4.77s
807:	learn: 0.4442584	total: 20s	remaining: 4.75s
808:	learn: 0.4441063	total: 20s	remaining: 4.72s
809:	learn: 0.4440082	total: 20s	remaining: 4.7s
810:	learn: 0.4437478	total: 20.1s	remaining: 4.68s
811:	learn: 0.4436131	to

949:	learn: 0.4219789	total: 21.1s	remaining: 1.11s
950:	learn: 0.4217861	total: 21.1s	remaining: 1.09s
951:	learn: 0.4215467	total: 21.1s	remaining: 1.07s
952:	learn: 0.4213395	total: 21.2s	remaining: 1.04s
953:	learn: 0.4212018	total: 21.2s	remaining: 1.02s
954:	learn: 0.4211219	total: 21.2s	remaining: 999ms
955:	learn: 0.4209814	total: 21.2s	remaining: 977ms
956:	learn: 0.4207921	total: 21.2s	remaining: 954ms
957:	learn: 0.4206747	total: 21.3s	remaining: 936ms
958:	learn: 0.4205793	total: 21.4s	remaining: 914ms
959:	learn: 0.4204509	total: 21.4s	remaining: 892ms
960:	learn: 0.4201572	total: 21.4s	remaining: 869ms
961:	learn: 0.4199209	total: 21.5s	remaining: 848ms
962:	learn: 0.4195948	total: 21.5s	remaining: 825ms
963:	learn: 0.4193963	total: 21.5s	remaining: 802ms
964:	learn: 0.4192224	total: 21.5s	remaining: 779ms
965:	learn: 0.4190084	total: 21.5s	remaining: 756ms
966:	learn: 0.4188429	total: 21.5s	remaining: 734ms
967:	learn: 0.4186956	total: 21.5s	remaining: 712ms
968:	learn: 

266:	learn: 0.5907272	total: 3.85s	remaining: 10.6s
267:	learn: 0.5903311	total: 3.86s	remaining: 10.5s
268:	learn: 0.5899538	total: 3.86s	remaining: 10.5s
269:	learn: 0.5897609	total: 3.87s	remaining: 10.5s
270:	learn: 0.5891494	total: 3.87s	remaining: 10.4s
271:	learn: 0.5888593	total: 3.88s	remaining: 10.4s
272:	learn: 0.5884832	total: 3.88s	remaining: 10.3s
273:	learn: 0.5881442	total: 3.89s	remaining: 10.3s
274:	learn: 0.5874641	total: 3.89s	remaining: 10.3s
275:	learn: 0.5870588	total: 3.9s	remaining: 10.2s
276:	learn: 0.5866018	total: 3.9s	remaining: 10.2s
277:	learn: 0.5863415	total: 3.91s	remaining: 10.2s
278:	learn: 0.5859080	total: 3.91s	remaining: 10.1s
279:	learn: 0.5856977	total: 3.92s	remaining: 10.1s
280:	learn: 0.5855265	total: 3.92s	remaining: 10s
281:	learn: 0.5850096	total: 3.93s	remaining: 10s
282:	learn: 0.5847027	total: 3.93s	remaining: 9.97s
283:	learn: 0.5838542	total: 3.94s	remaining: 9.93s
284:	learn: 0.5834307	total: 3.94s	remaining: 9.89s
285:	learn: 0.5829

582:	learn: 0.4937526	total: 5.59s	remaining: 4s
583:	learn: 0.4934157	total: 5.59s	remaining: 3.98s
584:	learn: 0.4931421	total: 5.6s	remaining: 3.97s
585:	learn: 0.4929992	total: 5.6s	remaining: 3.96s
586:	learn: 0.4927738	total: 5.61s	remaining: 3.94s
587:	learn: 0.4925608	total: 5.61s	remaining: 3.93s
588:	learn: 0.4921907	total: 5.62s	remaining: 3.92s
589:	learn: 0.4920263	total: 5.62s	remaining: 3.91s
590:	learn: 0.4916780	total: 5.63s	remaining: 3.89s
591:	learn: 0.4914282	total: 5.63s	remaining: 3.88s
592:	learn: 0.4911084	total: 5.64s	remaining: 3.87s
593:	learn: 0.4907496	total: 5.64s	remaining: 3.86s
594:	learn: 0.4905323	total: 5.65s	remaining: 3.84s
595:	learn: 0.4902070	total: 5.65s	remaining: 3.83s
596:	learn: 0.4899515	total: 5.66s	remaining: 3.82s
597:	learn: 0.4896596	total: 5.66s	remaining: 3.81s
598:	learn: 0.4892382	total: 5.67s	remaining: 3.79s
599:	learn: 0.4889858	total: 5.67s	remaining: 3.78s
600:	learn: 0.4886397	total: 5.68s	remaining: 3.77s
601:	learn: 0.488

Learning rate set to 0.088986
0:	learn: 1.0515571	total: 73.4ms	remaining: 1m 13s
1:	learn: 1.0199807	total: 90ms	remaining: 44.9s
2:	learn: 0.9909882	total: 104ms	remaining: 34.5s
3:	learn: 0.9653798	total: 112ms	remaining: 27.9s
4:	learn: 0.9439251	total: 120ms	remaining: 24s
5:	learn: 0.9252436	total: 127ms	remaining: 21s
6:	learn: 0.9094170	total: 133ms	remaining: 18.8s
7:	learn: 0.8948524	total: 138ms	remaining: 17.1s
8:	learn: 0.8753703	total: 144ms	remaining: 15.9s
9:	learn: 0.8600483	total: 150ms	remaining: 14.9s
10:	learn: 0.8456822	total: 156ms	remaining: 14.1s
11:	learn: 0.8367091	total: 163ms	remaining: 13.4s
12:	learn: 0.8275259	total: 168ms	remaining: 12.7s
13:	learn: 0.8167754	total: 173ms	remaining: 12.2s
14:	learn: 0.8090805	total: 178ms	remaining: 11.7s
15:	learn: 0.8031683	total: 183ms	remaining: 11.3s
16:	learn: 0.7947579	total: 189ms	remaining: 10.9s
17:	learn: 0.7881458	total: 219ms	remaining: 11.9s
18:	learn: 0.7819172	total: 226ms	remaining: 11.6s
19:	learn: 0.7

175:	learn: 0.6386971	total: 1.23s	remaining: 5.74s
176:	learn: 0.6383307	total: 1.23s	remaining: 5.73s
177:	learn: 0.6377072	total: 1.24s	remaining: 5.74s
178:	learn: 0.6370257	total: 1.25s	remaining: 5.74s
179:	learn: 0.6365743	total: 1.26s	remaining: 5.73s
180:	learn: 0.6360753	total: 1.26s	remaining: 5.72s
181:	learn: 0.6355732	total: 1.27s	remaining: 5.71s
182:	learn: 0.6351603	total: 1.28s	remaining: 5.7s
183:	learn: 0.6348917	total: 1.28s	remaining: 5.69s
184:	learn: 0.6346088	total: 1.29s	remaining: 5.68s
185:	learn: 0.6342908	total: 1.29s	remaining: 5.67s
186:	learn: 0.6337611	total: 1.3s	remaining: 5.65s
187:	learn: 0.6332463	total: 1.31s	remaining: 5.64s
188:	learn: 0.6328483	total: 1.31s	remaining: 5.63s
189:	learn: 0.6324184	total: 1.32s	remaining: 5.62s
190:	learn: 0.6320116	total: 1.32s	remaining: 5.61s
191:	learn: 0.6316120	total: 1.33s	remaining: 5.59s
192:	learn: 0.6309224	total: 1.33s	remaining: 5.58s
193:	learn: 0.6304401	total: 1.34s	remaining: 5.57s
194:	learn: 0.

351:	learn: 0.5772534	total: 2.43s	remaining: 4.48s
352:	learn: 0.5769342	total: 2.44s	remaining: 4.47s
353:	learn: 0.5763053	total: 2.45s	remaining: 4.47s
354:	learn: 0.5760589	total: 2.45s	remaining: 4.46s
355:	learn: 0.5757730	total: 2.46s	remaining: 4.45s
356:	learn: 0.5753823	total: 2.47s	remaining: 4.44s
357:	learn: 0.5751037	total: 2.47s	remaining: 4.43s
358:	learn: 0.5747622	total: 2.48s	remaining: 4.43s
359:	learn: 0.5745181	total: 2.48s	remaining: 4.42s
360:	learn: 0.5742831	total: 2.49s	remaining: 4.41s
361:	learn: 0.5740936	total: 2.5s	remaining: 4.4s
362:	learn: 0.5736113	total: 2.5s	remaining: 4.39s
363:	learn: 0.5734891	total: 2.51s	remaining: 4.38s
364:	learn: 0.5732220	total: 2.52s	remaining: 4.38s
365:	learn: 0.5730192	total: 2.52s	remaining: 4.37s
366:	learn: 0.5726764	total: 2.53s	remaining: 4.36s
367:	learn: 0.5723994	total: 2.53s	remaining: 4.35s
368:	learn: 0.5720878	total: 2.54s	remaining: 4.34s
369:	learn: 0.5718190	total: 2.54s	remaining: 4.33s
370:	learn: 0.5

525:	learn: 0.5315788	total: 3.64s	remaining: 3.28s
526:	learn: 0.5313154	total: 3.64s	remaining: 3.27s
527:	learn: 0.5310372	total: 3.65s	remaining: 3.26s
528:	learn: 0.5306575	total: 3.66s	remaining: 3.26s
529:	learn: 0.5304728	total: 3.67s	remaining: 3.25s
530:	learn: 0.5302479	total: 3.67s	remaining: 3.24s
531:	learn: 0.5300469	total: 3.68s	remaining: 3.23s
532:	learn: 0.5298490	total: 3.68s	remaining: 3.23s
533:	learn: 0.5296843	total: 3.69s	remaining: 3.22s
534:	learn: 0.5293364	total: 3.69s	remaining: 3.21s
535:	learn: 0.5290924	total: 3.7s	remaining: 3.2s
536:	learn: 0.5288651	total: 3.71s	remaining: 3.19s
537:	learn: 0.5286452	total: 3.71s	remaining: 3.19s
538:	learn: 0.5283755	total: 3.72s	remaining: 3.18s
539:	learn: 0.5281910	total: 3.72s	remaining: 3.17s
540:	learn: 0.5278854	total: 3.73s	remaining: 3.16s
541:	learn: 0.5276390	total: 3.73s	remaining: 3.15s
542:	learn: 0.5272518	total: 3.74s	remaining: 3.15s
543:	learn: 0.5271526	total: 3.75s	remaining: 3.14s
544:	learn: 0.

692:	learn: 0.4940955	total: 4.87s	remaining: 2.16s
693:	learn: 0.4939507	total: 4.88s	remaining: 2.15s
694:	learn: 0.4936450	total: 4.88s	remaining: 2.14s
695:	learn: 0.4933228	total: 4.89s	remaining: 2.13s
696:	learn: 0.4932450	total: 4.89s	remaining: 2.13s
697:	learn: 0.4930796	total: 4.9s	remaining: 2.12s
698:	learn: 0.4928105	total: 4.91s	remaining: 2.11s
699:	learn: 0.4926708	total: 4.92s	remaining: 2.11s
700:	learn: 0.4924124	total: 4.92s	remaining: 2.1s
701:	learn: 0.4922275	total: 4.93s	remaining: 2.09s
702:	learn: 0.4920236	total: 4.94s	remaining: 2.09s
703:	learn: 0.4918316	total: 4.95s	remaining: 2.08s
704:	learn: 0.4915134	total: 4.96s	remaining: 2.07s
705:	learn: 0.4912224	total: 4.97s	remaining: 2.07s
706:	learn: 0.4909534	total: 4.97s	remaining: 2.06s
707:	learn: 0.4907833	total: 4.99s	remaining: 2.06s
708:	learn: 0.4905621	total: 4.99s	remaining: 2.05s
709:	learn: 0.4902979	total: 5s	remaining: 2.04s
710:	learn: 0.4899963	total: 5.01s	remaining: 2.04s
711:	learn: 0.489

880:	learn: 0.4586130	total: 6.27s	remaining: 847ms
881:	learn: 0.4584981	total: 6.28s	remaining: 840ms
882:	learn: 0.4582727	total: 6.28s	remaining: 833ms
883:	learn: 0.4581486	total: 6.29s	remaining: 826ms
884:	learn: 0.4578933	total: 6.3s	remaining: 818ms
885:	learn: 0.4577679	total: 6.3s	remaining: 811ms
886:	learn: 0.4576561	total: 6.31s	remaining: 804ms
887:	learn: 0.4574247	total: 6.31s	remaining: 796ms
888:	learn: 0.4572689	total: 6.32s	remaining: 789ms
889:	learn: 0.4570972	total: 6.33s	remaining: 782ms
890:	learn: 0.4569975	total: 6.33s	remaining: 775ms
891:	learn: 0.4567926	total: 6.34s	remaining: 767ms
892:	learn: 0.4565676	total: 6.34s	remaining: 760ms
893:	learn: 0.4563555	total: 6.35s	remaining: 753ms
894:	learn: 0.4562546	total: 6.35s	remaining: 745ms
895:	learn: 0.4561265	total: 6.36s	remaining: 738ms
896:	learn: 0.4559936	total: 6.36s	remaining: 731ms
897:	learn: 0.4557982	total: 6.37s	remaining: 724ms
898:	learn: 0.4556403	total: 6.38s	remaining: 716ms
899:	learn: 0.

In [13]:
%%time

# Use model with same parameters as XGBoost - as far as possible
catclf = catboost.CatBoostClassifier(n_estimators=100,
                                     max_depth=20,
                                     learning_rate=0.1,
                                     subsample=0.8,
                                     colsample_bylevel=0.8,  #colsample_bytree=0.8,
                                     random_state=42)

model, cv_results = hyperparameter_optimization(model=lgbm_2,
                                                train_data=train_data[:10000],
                                                train_labels=y_train[:10000],
                                                scoring=["accuracy", "matthews_corrcoef"])


CV Training ACC: 0.8178 +/- 0.0026 
CV Test ACC: 0.6839 +/- 0.0121

CV Training MCC: 0.6645 +/- 0.0049 
CV Test MCC: 0.4026 +/- 0.0223

CPU times: user 1.45 s, sys: 32.5 ms, total: 1.49 s
Wall time: 2.26 s
