In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import cross_validate
from sklearn.preprocessing import LabelEncoder

import xgboost

sns.set_theme(style="whitegrid")
sns.set_palette(palette="Paired")
SEED: int = 42

In [2]:
train_data = pd.read_csv("../../data/processed/train_data_cleaned.csv", index_col="building_id")
y_train = pd.read_csv("../../data/processed/train_labels.csv", index_col="building_id", usecols=["building_id", "damage_grade"])

In [3]:
encoder = LabelEncoder()
train_labels = encoder.fit_transform(y_train["damage_grade"].to_numpy())

In [4]:
def hyperparameter_optimization(model=None, hyperparameter_grid=None, train_data=None, train_labels=None, scoring=None):
    # Get train labels in shape that .fit() expects
    if model == "XGBoost":
        # XGBoost expects [0, 1, 2] instead of [1, 2, 3]
        encoder = LabelEncoder()
        train_labels = encoder.fit_transform(train_labels["damage_grade"].to_numpy())
    else:
        train_labels = train_labels["damage_grade"].ravel()

    # Choose model based on input
    if model == "Dummy":
        model = DummyClassifier(strategy="most_frequent")
    elif model == "RandomForest":
        print("Fitting RandomForest ...")
        model = RandomForestClassifier(random_state=42)
    elif model == "DecisionTree":
        print("Fitting DecisionTree ...")
        model = DecisionTreeClassifier(random_state=42)
    elif model == "XGBoost":
        print("Fitting XGBoost ...")
        #model = xgboost.XGBClassifier(random_state=42, n_jobs=-1)
        model = xgboost.XGBClassifier(n_estimators=100,
                                      max_depth=20,
                                      learning_rate=0.1,
                                      subsample=0.8,
                                      colsample_bytree=0.8,
                                      random_state=42,
                                      n_jobs=-1)


    cv_results = cross_validate(model, train_data, train_labels, cv=5,
                                scoring=scoring,
                                n_jobs=-1,
                                return_train_score=True)
    model.fit(train_data, train_labels)


    print("")
    print(f"CV Training ACC: {round(np.mean(cv_results['train_accuracy']), 4)} +/- {round(np.std(cv_results['train_accuracy']), 4)} ")
    print(f"CV Test ACC: {round(np.mean(cv_results['test_accuracy']), 4)} +/- {round(np.std(cv_results['test_accuracy']), 4)}")
    print("")
    print(f"CV Training MCC: {round(np.mean(cv_results['train_matthews_corrcoef']), 4)} +/- {round(np.std(cv_results['train_matthews_corrcoef']), 4)} ")
    print(f"CV Test MCC: {round(np.mean(cv_results['test_matthews_corrcoef']), 4)} +/- {round(np.std(cv_results['test_matthews_corrcoef']), 4)}")
    print("")
    
    return model, cv_results

# Baseline (XGBoost)

In [5]:
%%time

model, cv_results = hyperparameter_optimization(model="XGBoost",
                                                train_data=train_data,
                                                train_labels=y_train,
                                                scoring=["accuracy", "matthews_corrcoef"])

Fitting XGBoost ...

CV Training ACC: 0.949 +/- 0.0004 
CV Test ACC: 0.7375 +/- 0.0025

CV Training MCC: 0.9077 +/- 0.0006 
CV Test MCC: 0.5125 +/- 0.0047

CPU times: user 17min 48s, sys: 20.5 s, total: 18min 8s
Wall time: 15min 30s


# LGBM

In [7]:
import lightgbm

In [8]:
%%time

# Use model with default parameters
lgbm = lightgbm.LGBMClassifier()

model, cv_results = hyperparameter_optimization(model=lgbm,
                                                train_data=train_data,
                                                train_labels=y_train,
                                                scoring=["accuracy", "matthews_corrcoef"])


CV Training ACC: 0.7165 +/- 0.0006 
CV Test ACC: 0.7104 +/- 0.0017

CV Training MCC: 0.4654 +/- 0.0012 
CV Test MCC: 0.4533 +/- 0.0033

CPU times: user 22.3 s, sys: 264 ms, total: 22.6 s
Wall time: 25.2 s


In [9]:
%%time

# Use model with same parameters as XGBoost - as far as possible
lgbm_2 = lightgbm.LGBMClassifier(n_estimators=100,
                                 max_depth=20,
                                 learning_rate=0.1,
                                 subsample=0.8,
                                 colsample_bytree=0.8,
                                 random_state=42,
                                 n_jobs=-1)

model, cv_results = hyperparameter_optimization(model=lgbm_2,
                                                train_data=train_data,
                                                train_labels=y_train,
                                                scoring=["accuracy", "matthews_corrcoef"])


CV Training ACC: 0.7167 +/- 0.0007 
CV Test ACC: 0.7108 +/- 0.0021

CV Training MCC: 0.4657 +/- 0.0013 
CV Test MCC: 0.4541 +/- 0.0041

CPU times: user 26 s, sys: 91 ms, total: 26.1 s
Wall time: 31.9 s


# CaTBoost

In [10]:
import catboost

In [11]:
%%time

# Use model with default parameters
catclf = catboost.CatBoostClassifier()

model, cv_results = hyperparameter_optimization(model=catclf,
                                                train_data=train_data,
                                                train_labels=y_train,
                                                scoring=["accuracy", "matthews_corrcoef"])

Learning rate set to 0.103895
0:	learn: 1.0428325	total: 228ms	remaining: 3m 48s
1:	learn: 0.9983696	total: 302ms	remaining: 2m 30s
2:	learn: 0.9624112	total: 353ms	remaining: 1m 57s
3:	learn: 0.9342866	total: 406ms	remaining: 1m 41s
4:	learn: 0.9072987	total: 473ms	remaining: 1m 34s
5:	learn: 0.8844244	total: 590ms	remaining: 1m 37s
6:	learn: 0.8662016	total: 784ms	remaining: 1m 51s
7:	learn: 0.8494598	total: 1.09s	remaining: 2m 14s
8:	learn: 0.8347882	total: 1.32s	remaining: 2m 24s
9:	learn: 0.8221617	total: 1.65s	remaining: 2m 43s
10:	learn: 0.8113943	total: 1.84s	remaining: 2m 45s
11:	learn: 0.8024083	total: 2.08s	remaining: 2m 51s
12:	learn: 0.7952199	total: 2.42s	remaining: 3m 3s
13:	learn: 0.7867845	total: 2.63s	remaining: 3m 5s
14:	learn: 0.7786181	total: 2.93s	remaining: 3m 12s
15:	learn: 0.7731026	total: 3.08s	remaining: 3m 9s
16:	learn: 0.7687498	total: 3.24s	remaining: 3m 7s
17:	learn: 0.7631683	total: 3.46s	remaining: 3m 9s
18:	learn: 0.7583754	total: 3.77s	remaining: 3m 1

Learning rate set to 0.103895
0:	learn: 1.0427994	total: 320ms	remaining: 5m 19s
1:	learn: 0.9982915	total: 572ms	remaining: 4m 45s
2:	learn: 0.9622960	total: 887ms	remaining: 4m 54s
3:	learn: 0.9342141	total: 1.2s	remaining: 4m 58s
4:	learn: 0.9072424	total: 1.47s	remaining: 4m 52s
5:	learn: 0.8844448	total: 1.72s	remaining: 4m 44s
6:	learn: 0.8662858	total: 1.98s	remaining: 4m 40s
7:	learn: 0.8495124	total: 2.22s	remaining: 4m 34s
8:	learn: 0.8349031	total: 2.41s	remaining: 4m 25s
9:	learn: 0.8237292	total: 2.75s	remaining: 4m 32s
10:	learn: 0.8141959	total: 3.13s	remaining: 4m 41s
11:	learn: 0.8052738	total: 3.47s	remaining: 4m 46s
12:	learn: 0.7972964	total: 3.7s	remaining: 4m 41s
13:	learn: 0.7892322	total: 4.07s	remaining: 4m 46s
14:	learn: 0.7816704	total: 4.45s	remaining: 4m 52s
15:	learn: 0.7758567	total: 4.66s	remaining: 4m 46s
16:	learn: 0.7715719	total: 4.84s	remaining: 4m 39s
17:	learn: 0.7655260	total: 5.14s	remaining: 4m 40s
18:	learn: 0.7609365	total: 5.38s	remaining: 4

156:	learn: 0.6698369	total: 49.8s	remaining: 4m 27s
157:	learn: 0.6696083	total: 50.1s	remaining: 4m 27s
158:	learn: 0.6693361	total: 50.4s	remaining: 4m 26s
159:	learn: 0.6691622	total: 50.9s	remaining: 4m 27s
160:	learn: 0.6689421	total: 51.1s	remaining: 4m 26s
161:	learn: 0.6687241	total: 51.4s	remaining: 4m 25s
162:	learn: 0.6685509	total: 51.6s	remaining: 4m 25s
163:	learn: 0.6683388	total: 51.9s	remaining: 4m 24s
164:	learn: 0.6681373	total: 52.1s	remaining: 4m 23s
165:	learn: 0.6679768	total: 52.3s	remaining: 4m 22s
166:	learn: 0.6678106	total: 52.6s	remaining: 4m 22s
167:	learn: 0.6675696	total: 52.9s	remaining: 4m 22s
168:	learn: 0.6674161	total: 53.2s	remaining: 4m 21s
169:	learn: 0.6672287	total: 53.5s	remaining: 4m 21s
170:	learn: 0.6670506	total: 53.8s	remaining: 4m 20s
171:	learn: 0.6668140	total: 54.1s	remaining: 4m 20s
172:	learn: 0.6662359	total: 54.5s	remaining: 4m 20s
173:	learn: 0.6661173	total: 54.9s	remaining: 4m 20s
174:	learn: 0.6659279	total: 55.3s	remaining: 

156:	learn: 0.6700191	total: 50.8s	remaining: 4m 32s
157:	learn: 0.6696946	total: 51.2s	remaining: 4m 32s
158:	learn: 0.6694651	total: 51.5s	remaining: 4m 32s
159:	learn: 0.6693004	total: 51.8s	remaining: 4m 31s
160:	learn: 0.6690505	total: 52s	remaining: 4m 31s
161:	learn: 0.6688283	total: 52.3s	remaining: 4m 30s
162:	learn: 0.6684468	total: 52.6s	remaining: 4m 30s
163:	learn: 0.6682162	total: 53s	remaining: 4m 29s
164:	learn: 0.6680622	total: 53.2s	remaining: 4m 29s
165:	learn: 0.6678945	total: 53.5s	remaining: 4m 28s
166:	learn: 0.6676152	total: 53.8s	remaining: 4m 28s
167:	learn: 0.6674521	total: 54.1s	remaining: 4m 27s
168:	learn: 0.6673022	total: 54.5s	remaining: 4m 27s
169:	learn: 0.6670735	total: 54.7s	remaining: 4m 27s
170:	learn: 0.6669476	total: 55s	remaining: 4m 26s
171:	learn: 0.6668005	total: 55.5s	remaining: 4m 27s
172:	learn: 0.6665134	total: 55.8s	remaining: 4m 26s
173:	learn: 0.6663653	total: 56.2s	remaining: 4m 26s
174:	learn: 0.6660901	total: 56.6s	remaining: 4m 26s

309:	learn: 0.6449983	total: 1m 42s	remaining: 3m 48s
310:	learn: 0.6449365	total: 1m 43s	remaining: 3m 49s
311:	learn: 0.6448782	total: 1m 43s	remaining: 3m 48s
312:	learn: 0.6447026	total: 1m 43s	remaining: 3m 47s
313:	learn: 0.6446517	total: 1m 44s	remaining: 3m 47s
314:	learn: 0.6444269	total: 1m 44s	remaining: 3m 47s
315:	learn: 0.6443411	total: 1m 45s	remaining: 3m 47s
316:	learn: 0.6441531	total: 1m 45s	remaining: 3m 47s
317:	learn: 0.6439961	total: 1m 45s	remaining: 3m 47s
318:	learn: 0.6438784	total: 1m 46s	remaining: 3m 46s
319:	learn: 0.6438075	total: 1m 46s	remaining: 3m 46s
320:	learn: 0.6436350	total: 1m 46s	remaining: 3m 45s
321:	learn: 0.6435306	total: 1m 47s	remaining: 3m 45s
322:	learn: 0.6434078	total: 1m 47s	remaining: 3m 45s
323:	learn: 0.6433395	total: 1m 48s	remaining: 3m 45s
324:	learn: 0.6432164	total: 1m 48s	remaining: 3m 45s
325:	learn: 0.6430839	total: 1m 48s	remaining: 3m 44s
326:	learn: 0.6430276	total: 1m 48s	remaining: 3m 43s
327:	learn: 0.6429336	total:

310:	learn: 0.6467958	total: 1m 46s	remaining: 3m 56s
311:	learn: 0.6467313	total: 1m 47s	remaining: 3m 56s
312:	learn: 0.6466768	total: 1m 47s	remaining: 3m 56s
313:	learn: 0.6465906	total: 1m 48s	remaining: 3m 56s
314:	learn: 0.6464715	total: 1m 48s	remaining: 3m 55s
315:	learn: 0.6464146	total: 1m 48s	remaining: 3m 55s
316:	learn: 0.6462627	total: 1m 49s	remaining: 3m 55s
317:	learn: 0.6461955	total: 1m 49s	remaining: 3m 54s
318:	learn: 0.6461088	total: 1m 49s	remaining: 3m 54s
319:	learn: 0.6460360	total: 1m 50s	remaining: 3m 53s
320:	learn: 0.6459004	total: 1m 50s	remaining: 3m 53s
321:	learn: 0.6458177	total: 1m 50s	remaining: 3m 53s
322:	learn: 0.6456916	total: 1m 51s	remaining: 3m 53s
323:	learn: 0.6456201	total: 1m 51s	remaining: 3m 52s
324:	learn: 0.6453986	total: 1m 52s	remaining: 3m 52s
325:	learn: 0.6452864	total: 1m 52s	remaining: 3m 52s
326:	learn: 0.6451389	total: 1m 52s	remaining: 3m 51s
327:	learn: 0.6449892	total: 1m 52s	remaining: 3m 51s
328:	learn: 0.6449175	total:

310:	learn: 0.6455434	total: 1m 46s	remaining: 3m 56s
311:	learn: 0.6454703	total: 1m 46s	remaining: 3m 55s
312:	learn: 0.6453083	total: 1m 47s	remaining: 3m 55s
313:	learn: 0.6451696	total: 1m 47s	remaining: 3m 55s
314:	learn: 0.6451089	total: 1m 48s	remaining: 3m 55s
315:	learn: 0.6449642	total: 1m 48s	remaining: 3m 54s
316:	learn: 0.6448796	total: 1m 48s	remaining: 3m 54s
317:	learn: 0.6447153	total: 1m 49s	remaining: 3m 54s
318:	learn: 0.6446125	total: 1m 49s	remaining: 3m 54s
319:	learn: 0.6445148	total: 1m 49s	remaining: 3m 53s
320:	learn: 0.6443802	total: 1m 50s	remaining: 3m 53s
321:	learn: 0.6442517	total: 1m 50s	remaining: 3m 52s
322:	learn: 0.6441538	total: 1m 51s	remaining: 3m 52s
323:	learn: 0.6440795	total: 1m 51s	remaining: 3m 52s
324:	learn: 0.6439871	total: 1m 51s	remaining: 3m 52s
325:	learn: 0.6439342	total: 1m 52s	remaining: 3m 52s
326:	learn: 0.6438312	total: 1m 52s	remaining: 3m 51s
327:	learn: 0.6437369	total: 1m 52s	remaining: 3m 51s
328:	learn: 0.6436613	total:

462:	learn: 0.6309640	total: 2m 39s	remaining: 3m 4s
463:	learn: 0.6308787	total: 2m 39s	remaining: 3m 4s
464:	learn: 0.6308207	total: 2m 39s	remaining: 3m 3s
465:	learn: 0.6306883	total: 2m 40s	remaining: 3m 3s
466:	learn: 0.6305820	total: 2m 40s	remaining: 3m 3s
467:	learn: 0.6305382	total: 2m 40s	remaining: 3m 2s
468:	learn: 0.6304726	total: 2m 41s	remaining: 3m 2s
469:	learn: 0.6304414	total: 2m 41s	remaining: 3m 2s
470:	learn: 0.6303205	total: 2m 41s	remaining: 3m 1s
471:	learn: 0.6302346	total: 2m 42s	remaining: 3m 1s
472:	learn: 0.6301635	total: 2m 42s	remaining: 3m
473:	learn: 0.6301052	total: 2m 42s	remaining: 3m
474:	learn: 0.6299900	total: 2m 43s	remaining: 3m
475:	learn: 0.6298875	total: 2m 43s	remaining: 2m 59s
476:	learn: 0.6296811	total: 2m 43s	remaining: 2m 59s
477:	learn: 0.6295732	total: 2m 43s	remaining: 2m 59s
478:	learn: 0.6295146	total: 2m 44s	remaining: 2m 58s
479:	learn: 0.6294186	total: 2m 44s	remaining: 2m 58s
480:	learn: 0.6293410	total: 2m 44s	remaining: 2m 

462:	learn: 0.6302638	total: 2m 39s	remaining: 3m 5s
463:	learn: 0.6301541	total: 2m 40s	remaining: 3m 5s
464:	learn: 0.6300793	total: 2m 40s	remaining: 3m 4s
465:	learn: 0.6300034	total: 2m 41s	remaining: 3m 4s
466:	learn: 0.6299450	total: 2m 41s	remaining: 3m 4s
467:	learn: 0.6298641	total: 2m 41s	remaining: 3m 4s
468:	learn: 0.6298126	total: 2m 42s	remaining: 3m 3s
469:	learn: 0.6297676	total: 2m 42s	remaining: 3m 3s
470:	learn: 0.6296776	total: 2m 42s	remaining: 3m 2s
471:	learn: 0.6296155	total: 2m 43s	remaining: 3m 2s
472:	learn: 0.6295071	total: 2m 43s	remaining: 3m 2s
473:	learn: 0.6294280	total: 2m 43s	remaining: 3m 1s
474:	learn: 0.6293778	total: 2m 44s	remaining: 3m 1s
475:	learn: 0.6292720	total: 2m 44s	remaining: 3m 1s
476:	learn: 0.6291145	total: 2m 44s	remaining: 3m
477:	learn: 0.6290123	total: 2m 45s	remaining: 3m
478:	learn: 0.6289180	total: 2m 45s	remaining: 3m
479:	learn: 0.6287542	total: 2m 45s	remaining: 2m 59s
480:	learn: 0.6286978	total: 2m 46s	remaining: 2m 59s


614:	learn: 0.6178876	total: 3m 30s	remaining: 2m 12s
615:	learn: 0.6178084	total: 3m 31s	remaining: 2m 11s
616:	learn: 0.6177311	total: 3m 31s	remaining: 2m 11s
617:	learn: 0.6176899	total: 3m 32s	remaining: 2m 11s
618:	learn: 0.6176215	total: 3m 32s	remaining: 2m 10s
619:	learn: 0.6175395	total: 3m 32s	remaining: 2m 10s
620:	learn: 0.6174957	total: 3m 33s	remaining: 2m 10s
621:	learn: 0.6174502	total: 3m 33s	remaining: 2m 9s
622:	learn: 0.6173404	total: 3m 34s	remaining: 2m 9s
623:	learn: 0.6172843	total: 3m 34s	remaining: 2m 9s
624:	learn: 0.6171842	total: 3m 34s	remaining: 2m 8s
625:	learn: 0.6171238	total: 3m 35s	remaining: 2m 8s
626:	learn: 0.6170602	total: 3m 35s	remaining: 2m 8s
627:	learn: 0.6170099	total: 3m 35s	remaining: 2m 7s
628:	learn: 0.6169589	total: 3m 36s	remaining: 2m 7s
629:	learn: 0.6168934	total: 3m 36s	remaining: 2m 7s
630:	learn: 0.6168241	total: 3m 36s	remaining: 2m 6s
631:	learn: 0.6167950	total: 3m 37s	remaining: 2m 6s
632:	learn: 0.6167655	total: 3m 37s	rem

614:	learn: 0.6185909	total: 3m 34s	remaining: 2m 14s
615:	learn: 0.6185309	total: 3m 34s	remaining: 2m 13s
616:	learn: 0.6184214	total: 3m 34s	remaining: 2m 13s
617:	learn: 0.6183723	total: 3m 35s	remaining: 2m 12s
618:	learn: 0.6183166	total: 3m 35s	remaining: 2m 12s
619:	learn: 0.6182698	total: 3m 35s	remaining: 2m 12s
620:	learn: 0.6182238	total: 3m 36s	remaining: 2m 11s
621:	learn: 0.6181223	total: 3m 36s	remaining: 2m 11s
622:	learn: 0.6180813	total: 3m 36s	remaining: 2m 11s
623:	learn: 0.6180341	total: 3m 36s	remaining: 2m 10s
624:	learn: 0.6179900	total: 3m 37s	remaining: 2m 10s
625:	learn: 0.6179416	total: 3m 37s	remaining: 2m 9s
626:	learn: 0.6178868	total: 3m 37s	remaining: 2m 9s
627:	learn: 0.6178341	total: 3m 37s	remaining: 2m 9s
628:	learn: 0.6177765	total: 3m 38s	remaining: 2m 8s
629:	learn: 0.6177219	total: 3m 38s	remaining: 2m 8s
630:	learn: 0.6176849	total: 3m 39s	remaining: 2m 8s
631:	learn: 0.6176407	total: 3m 39s	remaining: 2m 7s
632:	learn: 0.6175906	total: 3m 39s

767:	learn: 0.6086323	total: 4m 25s	remaining: 1m 20s
768:	learn: 0.6085824	total: 4m 25s	remaining: 1m 19s
769:	learn: 0.6085639	total: 4m 26s	remaining: 1m 19s
770:	learn: 0.6085025	total: 4m 26s	remaining: 1m 19s
771:	learn: 0.6083866	total: 4m 26s	remaining: 1m 18s
772:	learn: 0.6083185	total: 4m 27s	remaining: 1m 18s
773:	learn: 0.6082662	total: 4m 27s	remaining: 1m 18s
774:	learn: 0.6082263	total: 4m 27s	remaining: 1m 17s
775:	learn: 0.6081651	total: 4m 28s	remaining: 1m 17s
776:	learn: 0.6081303	total: 4m 28s	remaining: 1m 16s
777:	learn: 0.6081056	total: 4m 28s	remaining: 1m 16s
778:	learn: 0.6080036	total: 4m 29s	remaining: 1m 16s
779:	learn: 0.6079725	total: 4m 29s	remaining: 1m 16s
780:	learn: 0.6079037	total: 4m 29s	remaining: 1m 15s
781:	learn: 0.6078359	total: 4m 30s	remaining: 1m 15s
782:	learn: 0.6078026	total: 4m 30s	remaining: 1m 14s
783:	learn: 0.6077593	total: 4m 30s	remaining: 1m 14s
784:	learn: 0.6076578	total: 4m 31s	remaining: 1m 14s
785:	learn: 0.6076145	total:

767:	learn: 0.6105239	total: 4m 25s	remaining: 1m 20s
768:	learn: 0.6104783	total: 4m 26s	remaining: 1m 19s
769:	learn: 0.6104380	total: 4m 26s	remaining: 1m 19s
770:	learn: 0.6104120	total: 4m 26s	remaining: 1m 19s
771:	learn: 0.6103546	total: 4m 27s	remaining: 1m 18s
772:	learn: 0.6103333	total: 4m 27s	remaining: 1m 18s
773:	learn: 0.6102920	total: 4m 27s	remaining: 1m 18s
774:	learn: 0.6102429	total: 4m 28s	remaining: 1m 17s
775:	learn: 0.6101829	total: 4m 28s	remaining: 1m 17s
776:	learn: 0.6101289	total: 4m 29s	remaining: 1m 17s
777:	learn: 0.6100439	total: 4m 29s	remaining: 1m 16s
778:	learn: 0.6100100	total: 4m 29s	remaining: 1m 16s
779:	learn: 0.6099204	total: 4m 30s	remaining: 1m 16s
780:	learn: 0.6098550	total: 4m 30s	remaining: 1m 15s
781:	learn: 0.6097492	total: 4m 30s	remaining: 1m 15s
782:	learn: 0.6096923	total: 4m 31s	remaining: 1m 15s
783:	learn: 0.6095604	total: 4m 31s	remaining: 1m 14s
784:	learn: 0.6095014	total: 4m 32s	remaining: 1m 14s
785:	learn: 0.6094602	total:

767:	learn: 0.6089575	total: 4m 27s	remaining: 1m 20s
768:	learn: 0.6089022	total: 4m 28s	remaining: 1m 20s
769:	learn: 0.6088513	total: 4m 28s	remaining: 1m 20s
770:	learn: 0.6088019	total: 4m 28s	remaining: 1m 19s
771:	learn: 0.6087379	total: 4m 29s	remaining: 1m 19s
772:	learn: 0.6086580	total: 4m 29s	remaining: 1m 19s
773:	learn: 0.6086259	total: 4m 29s	remaining: 1m 18s
774:	learn: 0.6085886	total: 4m 30s	remaining: 1m 18s
775:	learn: 0.6085511	total: 4m 30s	remaining: 1m 18s
776:	learn: 0.6085028	total: 4m 30s	remaining: 1m 17s
777:	learn: 0.6084302	total: 4m 31s	remaining: 1m 17s
778:	learn: 0.6083821	total: 4m 31s	remaining: 1m 17s
779:	learn: 0.6083607	total: 4m 32s	remaining: 1m 16s
780:	learn: 0.6082979	total: 4m 32s	remaining: 1m 16s
781:	learn: 0.6082304	total: 4m 32s	remaining: 1m 16s
782:	learn: 0.6081904	total: 4m 33s	remaining: 1m 15s
783:	learn: 0.6081377	total: 4m 33s	remaining: 1m 15s
784:	learn: 0.6080502	total: 4m 34s	remaining: 1m 15s
785:	learn: 0.6079585	total:

2:	learn: 0.9611774	total: 230ms	remaining: 1m 16s
3:	learn: 0.9310094	total: 288ms	remaining: 1m 11s
4:	learn: 0.9064896	total: 491ms	remaining: 1m 37s
5:	learn: 0.8834302	total: 593ms	remaining: 1m 38s
6:	learn: 0.8644008	total: 675ms	remaining: 1m 35s
7:	learn: 0.8489989	total: 751ms	remaining: 1m 33s
8:	learn: 0.8345361	total: 802ms	remaining: 1m 28s
9:	learn: 0.8219642	total: 860ms	remaining: 1m 25s
10:	learn: 0.8103393	total: 922ms	remaining: 1m 22s
11:	learn: 0.8002180	total: 1.09s	remaining: 1m 29s
12:	learn: 0.7929971	total: 1.19s	remaining: 1m 30s
13:	learn: 0.7854110	total: 1.28s	remaining: 1m 29s
14:	learn: 0.7780943	total: 1.36s	remaining: 1m 29s
15:	learn: 0.7725918	total: 1.42s	remaining: 1m 27s
16:	learn: 0.7674153	total: 1.6s	remaining: 1m 32s
17:	learn: 0.7630176	total: 1.7s	remaining: 1m 32s
18:	learn: 0.7589237	total: 1.8s	remaining: 1m 33s
19:	learn: 0.7555446	total: 1.86s	remaining: 1m 31s
20:	learn: 0.7506408	total: 1.92s	remaining: 1m 29s
21:	learn: 0.7482352	to

160:	learn: 0.6684814	total: 14.4s	remaining: 1m 15s
161:	learn: 0.6682697	total: 14.5s	remaining: 1m 14s
162:	learn: 0.6681507	total: 14.5s	remaining: 1m 14s
163:	learn: 0.6679075	total: 14.7s	remaining: 1m 14s
164:	learn: 0.6678051	total: 14.8s	remaining: 1m 14s
165:	learn: 0.6675713	total: 14.9s	remaining: 1m 14s
166:	learn: 0.6674597	total: 14.9s	remaining: 1m 14s
167:	learn: 0.6672390	total: 15s	remaining: 1m 14s
168:	learn: 0.6670972	total: 15s	remaining: 1m 13s
169:	learn: 0.6669493	total: 15.1s	remaining: 1m 13s
170:	learn: 0.6666633	total: 15.2s	remaining: 1m 13s
171:	learn: 0.6665195	total: 15.2s	remaining: 1m 13s
172:	learn: 0.6662623	total: 15.3s	remaining: 1m 13s
173:	learn: 0.6661460	total: 15.4s	remaining: 1m 13s
174:	learn: 0.6659723	total: 15.5s	remaining: 1m 12s
175:	learn: 0.6658321	total: 15.5s	remaining: 1m 12s
176:	learn: 0.6656933	total: 15.6s	remaining: 1m 12s
177:	learn: 0.6654154	total: 15.7s	remaining: 1m 12s
178:	learn: 0.6652192	total: 15.8s	remaining: 1m 1

320:	learn: 0.6445407	total: 27.6s	remaining: 58.5s
321:	learn: 0.6444773	total: 27.7s	remaining: 58.3s
322:	learn: 0.6443894	total: 27.7s	remaining: 58.2s
323:	learn: 0.6441858	total: 27.8s	remaining: 58.1s
324:	learn: 0.6440060	total: 27.9s	remaining: 57.9s
325:	learn: 0.6439017	total: 28s	remaining: 58s
326:	learn: 0.6437862	total: 28.2s	remaining: 58s
327:	learn: 0.6436489	total: 28.3s	remaining: 57.9s
328:	learn: 0.6435270	total: 28.3s	remaining: 57.7s
329:	learn: 0.6434185	total: 28.4s	remaining: 57.6s
330:	learn: 0.6432253	total: 28.5s	remaining: 57.5s
331:	learn: 0.6431249	total: 28.5s	remaining: 57.4s
332:	learn: 0.6430605	total: 28.7s	remaining: 57.5s
333:	learn: 0.6428774	total: 28.8s	remaining: 57.4s
334:	learn: 0.6427323	total: 28.9s	remaining: 57.3s
335:	learn: 0.6426454	total: 28.9s	remaining: 57.2s
336:	learn: 0.6425771	total: 29s	remaining: 57s
337:	learn: 0.6424779	total: 29.1s	remaining: 56.9s
338:	learn: 0.6424092	total: 29.1s	remaining: 56.8s
339:	learn: 0.6423249	

480:	learn: 0.6294424	total: 39.2s	remaining: 42.3s
481:	learn: 0.6293993	total: 39.3s	remaining: 42.2s
482:	learn: 0.6292343	total: 39.4s	remaining: 42.2s
483:	learn: 0.6291452	total: 39.5s	remaining: 42.1s
484:	learn: 0.6290730	total: 39.6s	remaining: 42s
485:	learn: 0.6290160	total: 39.6s	remaining: 41.9s
486:	learn: 0.6289200	total: 39.7s	remaining: 41.8s
487:	learn: 0.6288435	total: 39.9s	remaining: 41.8s
488:	learn: 0.6287850	total: 39.9s	remaining: 41.7s
489:	learn: 0.6286937	total: 40s	remaining: 41.7s
490:	learn: 0.6286082	total: 40.1s	remaining: 41.6s
491:	learn: 0.6285473	total: 40.2s	remaining: 41.5s
492:	learn: 0.6284851	total: 40.2s	remaining: 41.4s
493:	learn: 0.6284072	total: 40.3s	remaining: 41.3s
494:	learn: 0.6283133	total: 40.5s	remaining: 41.3s
495:	learn: 0.6282583	total: 40.6s	remaining: 41.2s
496:	learn: 0.6281876	total: 40.6s	remaining: 41.1s
497:	learn: 0.6281397	total: 40.7s	remaining: 41.1s
498:	learn: 0.6280508	total: 40.8s	remaining: 40.9s
499:	learn: 0.62

640:	learn: 0.6173997	total: 51.8s	remaining: 29s
641:	learn: 0.6173643	total: 51.9s	remaining: 28.9s
642:	learn: 0.6173225	total: 52s	remaining: 28.8s
643:	learn: 0.6172839	total: 52s	remaining: 28.8s
644:	learn: 0.6172492	total: 52.1s	remaining: 28.7s
645:	learn: 0.6171358	total: 52.1s	remaining: 28.6s
646:	learn: 0.6170388	total: 52.3s	remaining: 28.5s
647:	learn: 0.6169382	total: 52.4s	remaining: 28.5s
648:	learn: 0.6168666	total: 52.5s	remaining: 28.4s
649:	learn: 0.6168153	total: 52.5s	remaining: 28.3s
650:	learn: 0.6167382	total: 52.6s	remaining: 28.2s
651:	learn: 0.6167034	total: 52.6s	remaining: 28.1s
652:	learn: 0.6166651	total: 52.7s	remaining: 28s
653:	learn: 0.6165836	total: 52.8s	remaining: 27.9s
654:	learn: 0.6165531	total: 52.8s	remaining: 27.8s
655:	learn: 0.6164861	total: 52.9s	remaining: 27.7s
656:	learn: 0.6164397	total: 52.9s	remaining: 27.6s
657:	learn: 0.6163766	total: 53s	remaining: 27.5s
658:	learn: 0.6162906	total: 53.1s	remaining: 27.5s
659:	learn: 0.6162666	

800:	learn: 0.6080614	total: 1m 2s	remaining: 15.6s
801:	learn: 0.6080072	total: 1m 2s	remaining: 15.5s
802:	learn: 0.6079677	total: 1m 2s	remaining: 15.4s
803:	learn: 0.6079018	total: 1m 3s	remaining: 15.4s
804:	learn: 0.6078395	total: 1m 3s	remaining: 15.3s
805:	learn: 0.6077982	total: 1m 3s	remaining: 15.2s
806:	learn: 0.6077596	total: 1m 3s	remaining: 15.1s
807:	learn: 0.6077282	total: 1m 3s	remaining: 15.1s
808:	learn: 0.6076813	total: 1m 3s	remaining: 15s
809:	learn: 0.6076386	total: 1m 3s	remaining: 14.9s
810:	learn: 0.6075626	total: 1m 3s	remaining: 14.8s
811:	learn: 0.6074895	total: 1m 3s	remaining: 14.7s
812:	learn: 0.6074617	total: 1m 3s	remaining: 14.7s
813:	learn: 0.6074287	total: 1m 3s	remaining: 14.6s
814:	learn: 0.6073919	total: 1m 3s	remaining: 14.5s
815:	learn: 0.6073605	total: 1m 3s	remaining: 14.4s
816:	learn: 0.6072947	total: 1m 3s	remaining: 14.3s
817:	learn: 0.6072386	total: 1m 4s	remaining: 14.3s
818:	learn: 0.6071979	total: 1m 4s	remaining: 14.2s
819:	learn: 0.

958:	learn: 0.6005109	total: 1m 14s	remaining: 3.18s
959:	learn: 0.6004466	total: 1m 14s	remaining: 3.11s
960:	learn: 0.6004131	total: 1m 14s	remaining: 3.03s
961:	learn: 0.6003749	total: 1m 14s	remaining: 2.96s
962:	learn: 0.6003476	total: 1m 14s	remaining: 2.88s
963:	learn: 0.6003165	total: 1m 14s	remaining: 2.8s
964:	learn: 0.6002837	total: 1m 15s	remaining: 2.72s
965:	learn: 0.6002382	total: 1m 15s	remaining: 2.64s
966:	learn: 0.6002128	total: 1m 15s	remaining: 2.56s
967:	learn: 0.6001447	total: 1m 15s	remaining: 2.49s
968:	learn: 0.6001139	total: 1m 15s	remaining: 2.41s
969:	learn: 0.6000314	total: 1m 15s	remaining: 2.33s
970:	learn: 0.6000090	total: 1m 15s	remaining: 2.25s
971:	learn: 0.5999520	total: 1m 15s	remaining: 2.17s
972:	learn: 0.5998758	total: 1m 15s	remaining: 2.1s
973:	learn: 0.5998361	total: 1m 15s	remaining: 2.02s
974:	learn: 0.5997737	total: 1m 15s	remaining: 1.94s
975:	learn: 0.5997316	total: 1m 15s	remaining: 1.86s
976:	learn: 0.5996914	total: 1m 15s	remaining: 1

In [12]:
%%time

# Use model with same parameters as XGBoost - as far as possible
catclf = catboost.CatBoostClassifier(n_estimators=100,
                                     max_depth=20,
                                     learning_rate=0.1,
                                     subsample=0.8,
                                     colsample_bylevel=0.8,  #colsample_bytree=0.8,
                                     random_state=42)

model, cv_results = hyperparameter_optimization(model=lgbm_2,
                                                train_data=train_data,
                                                train_labels=y_train,
                                                scoring=["accuracy", "matthews_corrcoef"])


CV Training ACC: 0.7167 +/- 0.0007 
CV Test ACC: 0.7108 +/- 0.0021

CV Training MCC: 0.4657 +/- 0.0013 
CV Test MCC: 0.4541 +/- 0.0041

CPU times: user 29.6 s, sys: 192 ms, total: 29.8 s
Wall time: 32.3 s
