# Bayesian Optimization & Supervised Machine Learning

## Overview
1. Load sonar all data
2. Prepared Data
3. Split data into training and testing datasets
4. Encode data for Xgboost (matrix)
5. Train ML models
6. Evaluate ML performance
7. Confusion Matrix
8. Save Model
9. Load Model
10. Predict on new data

In [35]:
# Import Libraries.

from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold

# Metrics
from sklearn.metrics import roc_auc_score
from sklearn.metrics import make_scorer

from time import time

# Bayesian Optimization -bayes_opt
#from bayes_opt import BayesianOptimization
#from bayes_opt.util import Colours

# # Bayesian Optimization - skopt
from skopt import BayesSearchCV
from skopt.callbacks import DeadlineStopper, VerboseCallback, DeltaXStopper
from skopt.space import Real, Categorical, Integer

# Catboost
from catboost import CatBoostClassifier, Pool
import ipywidgets # Nødvendig for plot


# Model selection
# Reference: https://www.kaggle.com/shivampanwar/catboost-and-hyperparameter-tuning-using-bayes


# Udeståender:
# Omkode skopt eksempel, så det anvender bayes_opt eller omvendt.
# log-uniform virker ikke med search space.
# Lave RoC-Curve
# Confusion Matrix
# Learning Curve
# Div. andre evaluation methods

# Save Model
# load model
# Predict "nye data"

### Generate Synthetic binary classification dataset.

In [36]:
##########################
# Load dataset fra min github
from pandas import read_csv
url = 'https://raw.githubusercontent.com/kurtholst/databricks_proj/master/sonar.all-data.csv'
dataset = read_csv(url, header=None)
dataset.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R


In [37]:
# Class distribution
dataset[60].value_counts()

M    111
R     97
Name: 60, dtype: int64

In [38]:
# Split-out validation dataset i data og target klasse.
array = dataset.values
data = array[:,0:60].astype(float)
targets = array[:,60]

# For simplificering:
X=data
y=targets

#####################################
# Split i training og test dataset. #
from sklearn.model_selection import train_test_split

train_X, test_X, train_y, test_y = train_test_split(X, y, 
                                                    train_size=0.5,
                                                    test_size=0.5,
                                                    random_state=122)
print("Labels for training and testing data")
print(train_y)
print(test_y)

Labels for training and testing data
['R' 'R' 'R' 'M' 'M' 'M' 'R' 'R' 'M' 'M' 'R' 'R' 'R' 'R' 'M' 'R' 'R' 'M'
 'R' 'M' 'M' 'R' 'M' 'M' 'R' 'R' 'R' 'M' 'M' 'M' 'M' 'R' 'M' 'M' 'R' 'R'
 'R' 'R' 'M' 'M' 'R' 'R' 'M' 'R' 'M' 'R' 'M' 'R' 'M' 'M' 'R' 'R' 'M' 'M'
 'M' 'M' 'R' 'M' 'M' 'M' 'M' 'R' 'R' 'R' 'R' 'M' 'M' 'M' 'M' 'R' 'R' 'M'
 'M' 'R' 'R' 'M' 'R' 'R' 'R' 'M' 'R' 'M' 'R' 'M' 'R' 'R' 'M' 'R' 'R' 'M'
 'R' 'R' 'M' 'M' 'M' 'M' 'R' 'M' 'R' 'R' 'R' 'M' 'M' 'M']
['M' 'R' 'M' 'M' 'M' 'M' 'M' 'R' 'M' 'R' 'M' 'M' 'R' 'R' 'R' 'M' 'R' 'R'
 'M' 'R' 'M' 'R' 'R' 'M' 'R' 'R' 'M' 'M' 'R' 'R' 'R' 'R' 'M' 'M' 'R' 'M'
 'M' 'M' 'M' 'R' 'R' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'R' 'R' 'R' 'R' 'R'
 'M' 'R' 'M' 'R' 'M' 'R' 'R' 'M' 'M' 'M' 'M' 'R' 'M' 'R' 'R' 'R' 'M' 'M'
 'R' 'R' 'M' 'M' 'M' 'R' 'M' 'M' 'R' 'R' 'M' 'M' 'M' 'M' 'M' 'M' 'R' 'M'
 'R' 'M' 'R' 'R' 'M' 'M' 'M' 'M' 'R' 'M' 'R' 'R' 'M' 'M']


#### Confusion Matrix
#### ROC-Curve
#### Save og Load model
#### Predict på "nye data"

# Catboost

# Bayesian Optimization using skopt

In [49]:
# Define clf classifier
clf = CatBoostClassifier(thread_count=2,
                         loss_function='Logloss',
                         task_type="GPU", #task_type="CPU",
                         od_type = 'Iter',
                         verbose= True)

In [50]:
# Prepare Search Space for hyperparameter tuning

In [51]:
# Defining Catboost search space - bruges sammen med 
search_spaces = {'iterations': (10, 100),
                 #'iterations': (10, 1000),
                 'depth': (1, 8),
                 #'learning_rate': (0.01, 1.0, 'log-uniform'),
                 'learning_rate': (0.01, 1.0),
                 #'random_strength': (1e-9, 10, 'log-uniform'),
                 'random_strength': (1e-9, 10),
                 'bagging_temperature': (0.0, 1.0),
                 'border_count': (1, 255),
                 'l2_leaf_reg': (2, 30),
                 #'scale_pos_weight':(0.01, 1.0, 'uniform')}
                 'scale_pos_weight':(0.01, 1.0)}

In [52]:
# Functions for Bayes Search

In [53]:
# Reporting util for different optimizers
def report_perf(optimizer, X, y, title, callbacks=None):
    """
    A wrapper for measuring time and performances of different optmizers
    
    optimizer = a sklearn or a skopt optimizer
    X = the training set 
    y = our target
    title = a string label for the experiment
    """
    import pandas as pd
    import pprint
    start = time()
    if callbacks:
        optimizer.fit(X, y, callback=callbacks)
    else:
        optimizer.fit(X, y)
    d=pd.DataFrame(optimizer.cv_results_)
    best_score = optimizer.best_score_
    best_score_std = d.iloc[optimizer.best_index_].std_test_score
    best_params = optimizer.best_params_
    print((title + " took %.2f seconds,  candidates checked: %d, best CV score: %.3f "
           +u"\u00B1"+" %.3f") % (time() - start, 
                                  len(optimizer.cv_results_['params']),
                                  best_score,
                                  best_score_std))    
    print('Best parameters:')
    pprint.pprint(best_params)
    print()
    return best_params

# RoC-Curver or Area Under Curve:
roc_auc = make_scorer(roc_auc_score, greater_is_better=True, needs_threshold=True)
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)

In [54]:
# Setting up BayesSearchCV
opt = BayesSearchCV(estimator=clf, 
                    search_spaces=search_spaces,
                    scoring=roc_auc,
                    cv=skf,
                    n_iter=10,
                    n_jobs=1,  # use just 1 job with CatBoost in order to avoid segmentation fault
                    return_train_score=False,
                    refit=True,
                    optimizer_kwargs={'base_estimator': 'GP'},
                    random_state=42)

In [56]:
# Execute bayesian 
best_params = report_perf(optimizer = opt, 
                          X = X, 
                          y = y, 
                          title = 'CatBoost', 
                          callbacks=[VerboseCallback(100), DeadlineStopper(60*10)])

# CPU tid: 188 sec
# GPU 38 sec.

Iteration No: 1 started. Searching for the next optimal point.
0:	learn: 0.5873997	total: 19.3ms	remaining: 713ms
1:	learn: 0.5141872	total: 38.3ms	remaining: 690ms
2:	learn: 0.4500903	total: 58ms	remaining: 677ms
3:	learn: 0.4135970	total: 77.3ms	remaining: 657ms
4:	learn: 0.3744956	total: 95.4ms	remaining: 630ms
5:	learn: 0.3352537	total: 114ms	remaining: 606ms
6:	learn: 0.3073771	total: 133ms	remaining: 588ms
7:	learn: 0.2784326	total: 149ms	remaining: 559ms
8:	learn: 0.2615271	total: 170ms	remaining: 547ms
9:	learn: 0.2421525	total: 187ms	remaining: 523ms
10:	learn: 0.2265690	total: 204ms	remaining: 500ms
11:	learn: 0.2001559	total: 222ms	remaining: 481ms
12:	learn: 0.1887588	total: 241ms	remaining: 463ms
13:	learn: 0.1777471	total: 259ms	remaining: 443ms
14:	learn: 0.1692120	total: 276ms	remaining: 424ms
15:	learn: 0.1592049	total: 295ms	remaining: 406ms
16:	learn: 0.1508645	total: 314ms	remaining: 388ms
17:	learn: 0.1430796	total: 333ms	remaining: 370ms
18:	learn: 0.1350958	total

11:	learn: 0.2246175	total: 234ms	remaining: 508ms
12:	learn: 0.2025285	total: 254ms	remaining: 489ms
13:	learn: 0.1903955	total: 273ms	remaining: 468ms
14:	learn: 0.1802443	total: 292ms	remaining: 448ms
15:	learn: 0.1711167	total: 310ms	remaining: 426ms
16:	learn: 0.1629928	total: 329ms	remaining: 406ms
17:	learn: 0.1525024	total: 346ms	remaining: 384ms
18:	learn: 0.1435531	total: 365ms	remaining: 365ms
19:	learn: 0.1354510	total: 383ms	remaining: 345ms
20:	learn: 0.1294525	total: 401ms	remaining: 325ms
21:	learn: 0.1211016	total: 423ms	remaining: 308ms
22:	learn: 0.1159935	total: 443ms	remaining: 289ms
23:	learn: 0.1108533	total: 461ms	remaining: 269ms
24:	learn: 0.1067840	total: 479ms	remaining: 249ms
25:	learn: 0.1016628	total: 496ms	remaining: 229ms
26:	learn: 0.0976052	total: 512ms	remaining: 209ms
27:	learn: 0.0949707	total: 530ms	remaining: 189ms
28:	learn: 0.0917856	total: 547ms	remaining: 170ms
29:	learn: 0.0889856	total: 564ms	remaining: 150ms
30:	learn: 0.0838999	total: 579

56:	learn: 0.2206476	total: 362ms	remaining: 248ms
57:	learn: 0.2178925	total: 370ms	remaining: 242ms
58:	learn: 0.2175351	total: 378ms	remaining: 237ms
59:	learn: 0.2172039	total: 388ms	remaining: 233ms
60:	learn: 0.2143619	total: 394ms	remaining: 226ms
61:	learn: 0.2115825	total: 402ms	remaining: 221ms
62:	learn: 0.2112354	total: 408ms	remaining: 214ms
63:	learn: 0.2089991	total: 415ms	remaining: 207ms
64:	learn: 0.2073849	total: 422ms	remaining: 201ms
65:	learn: 0.2056748	total: 428ms	remaining: 195ms
66:	learn: 0.2055051	total: 434ms	remaining: 188ms
67:	learn: 0.2051974	total: 439ms	remaining: 181ms
68:	learn: 0.2049943	total: 445ms	remaining: 174ms
69:	learn: 0.2047850	total: 451ms	remaining: 167ms
70:	learn: 0.2045969	total: 457ms	remaining: 161ms
71:	learn: 0.2043868	total: 462ms	remaining: 154ms
72:	learn: 0.2023463	total: 468ms	remaining: 148ms
73:	learn: 0.1996716	total: 476ms	remaining: 141ms
74:	learn: 0.1993477	total: 482ms	remaining: 135ms
75:	learn: 0.1990799	total: 489

27:	learn: 0.2949680	total: 190ms	remaining: 460ms
28:	learn: 0.2909659	total: 195ms	remaining: 451ms
29:	learn: 0.2866336	total: 203ms	remaining: 447ms
30:	learn: 0.2843572	total: 210ms	remaining: 441ms
31:	learn: 0.2805966	total: 217ms	remaining: 434ms
32:	learn: 0.2751845	total: 223ms	remaining: 426ms
33:	learn: 0.2718052	total: 229ms	remaining: 418ms
34:	learn: 0.2679034	total: 235ms	remaining: 410ms
35:	learn: 0.2655924	total: 241ms	remaining: 402ms
36:	learn: 0.2623442	total: 247ms	remaining: 394ms
37:	learn: 0.2586455	total: 253ms	remaining: 386ms
38:	learn: 0.2550334	total: 259ms	remaining: 378ms
39:	learn: 0.2522392	total: 265ms	remaining: 371ms
40:	learn: 0.2493993	total: 271ms	remaining: 363ms
41:	learn: 0.2451885	total: 277ms	remaining: 356ms
42:	learn: 0.2430112	total: 283ms	remaining: 349ms
43:	learn: 0.2408230	total: 289ms	remaining: 342ms
44:	learn: 0.2374239	total: 295ms	remaining: 335ms
45:	learn: 0.2353451	total: 301ms	remaining: 328ms
46:	learn: 0.2332566	total: 307

Iteration No: 2 ended. Search finished for the next optimal point.
Time taken: 4.0810
Function value obtained: -0.9118
Current minimum: -0.9268
Iteration No: 3 started. Searching for the next optimal point.
0:	learn: 0.5067541	total: 7.92ms	remaining: 380ms
1:	learn: 0.4175649	total: 13.7ms	remaining: 323ms
2:	learn: 0.3757048	total: 19.8ms	remaining: 304ms
3:	learn: 0.3455594	total: 26.3ms	remaining: 296ms
4:	learn: 0.3073434	total: 32.5ms	remaining: 286ms
5:	learn: 0.2832918	total: 39.8ms	remaining: 286ms
6:	learn: 0.2448897	total: 45.6ms	remaining: 274ms
7:	learn: 0.2222019	total: 51.5ms	remaining: 264ms
8:	learn: 0.2128916	total: 57.2ms	remaining: 254ms
9:	learn: 0.2045410	total: 63.7ms	remaining: 248ms
10:	learn: 0.1925986	total: 70.6ms	remaining: 244ms
11:	learn: 0.1760498	total: 77.9ms	remaining: 240ms
12:	learn: 0.1600403	total: 84.5ms	remaining: 234ms
13:	learn: 0.1522427	total: 90ms	remaining: 225ms
14:	learn: 0.1493004	total: 95.4ms	remaining: 216ms
15:	learn: 0.1294827	tota

31:	learn: 0.0418765	total: 178ms	remaining: 94.7ms
32:	learn: 0.0406900	total: 183ms	remaining: 88.9ms
33:	learn: 0.0402925	total: 190ms	remaining: 83.7ms
34:	learn: 0.0385763	total: 196ms	remaining: 78.4ms
35:	learn: 0.0379354	total: 203ms	remaining: 73.4ms
36:	learn: 0.0351694	total: 210ms	remaining: 68.2ms
37:	learn: 0.0330073	total: 216ms	remaining: 62.4ms
38:	learn: 0.0301220	total: 222ms	remaining: 56.9ms
39:	learn: 0.0288665	total: 227ms	remaining: 51.1ms
40:	learn: 0.0282667	total: 232ms	remaining: 45.3ms
41:	learn: 0.0273710	total: 238ms	remaining: 39.7ms
42:	learn: 0.0253522	total: 244ms	remaining: 34ms
43:	learn: 0.0251230	total: 249ms	remaining: 28.3ms
44:	learn: 0.0247826	total: 255ms	remaining: 22.7ms
45:	learn: 0.0240693	total: 262ms	remaining: 17.1ms
46:	learn: 0.0229172	total: 267ms	remaining: 11.4ms
47:	learn: 0.0219592	total: 274ms	remaining: 5.72ms
48:	learn: 0.0209314	total: 280ms	remaining: 0us
0:	learn: 0.5407039	total: 6.65ms	remaining: 319ms
1:	learn: 0.469888

20:	learn: 0.3143432	total: 189ms	remaining: 550ms
21:	learn: 0.3048457	total: 199ms	remaining: 543ms
22:	learn: 0.2939241	total: 210ms	remaining: 538ms
23:	learn: 0.2875452	total: 231ms	remaining: 559ms
24:	learn: 0.2822726	total: 250ms	remaining: 571ms
25:	learn: 0.2721667	total: 269ms	remaining: 580ms
26:	learn: 0.2670136	total: 289ms	remaining: 589ms
27:	learn: 0.2593980	total: 304ms	remaining: 586ms
28:	learn: 0.2544390	total: 314ms	remaining: 573ms
29:	learn: 0.2490149	total: 323ms	remaining: 560ms
30:	learn: 0.2420742	total: 332ms	remaining: 546ms
31:	learn: 0.2323626	total: 342ms	remaining: 534ms
32:	learn: 0.2276795	total: 353ms	remaining: 524ms
33:	learn: 0.2244619	total: 361ms	remaining: 509ms
34:	learn: 0.2179872	total: 371ms	remaining: 498ms
35:	learn: 0.2146178	total: 381ms	remaining: 486ms
36:	learn: 0.2082560	total: 388ms	remaining: 472ms
37:	learn: 0.2041264	total: 396ms	remaining: 459ms
38:	learn: 0.2000355	total: 404ms	remaining: 446ms
39:	learn: 0.1960628	total: 412

20:	learn: 0.2984479	total: 186ms	remaining: 540ms
21:	learn: 0.2900494	total: 194ms	remaining: 528ms
22:	learn: 0.2817640	total: 203ms	remaining: 521ms
23:	learn: 0.2687858	total: 212ms	remaining: 513ms
24:	learn: 0.2634151	total: 219ms	remaining: 500ms
25:	learn: 0.2560148	total: 227ms	remaining: 488ms
26:	learn: 0.2488753	total: 234ms	remaining: 476ms
27:	learn: 0.2418856	total: 242ms	remaining: 466ms
28:	learn: 0.2391068	total: 249ms	remaining: 455ms
29:	learn: 0.2314152	total: 257ms	remaining: 445ms
30:	learn: 0.2225493	total: 265ms	remaining: 436ms
31:	learn: 0.2170732	total: 274ms	remaining: 428ms
32:	learn: 0.2140538	total: 282ms	remaining: 419ms
33:	learn: 0.2104362	total: 290ms	remaining: 410ms
34:	learn: 0.2042581	total: 299ms	remaining: 401ms
35:	learn: 0.1992686	total: 306ms	remaining: 391ms
36:	learn: 0.1916451	total: 315ms	remaining: 384ms
37:	learn: 0.1861428	total: 323ms	remaining: 374ms
38:	learn: 0.1819431	total: 331ms	remaining: 365ms
39:	learn: 0.1787127	total: 342

15:	learn: 0.0875860	total: 200ms	remaining: 726ms
16:	learn: 0.0846212	total: 211ms	remaining: 709ms
17:	learn: 0.0818235	total: 222ms	remaining: 692ms
18:	learn: 0.0779614	total: 231ms	remaining: 668ms
19:	learn: 0.0739588	total: 239ms	remaining: 644ms
20:	learn: 0.0707675	total: 247ms	remaining: 622ms
21:	learn: 0.0672145	total: 255ms	remaining: 603ms
22:	learn: 0.0634284	total: 263ms	remaining: 583ms
23:	learn: 0.0595404	total: 271ms	remaining: 564ms
24:	learn: 0.0566788	total: 279ms	remaining: 546ms
25:	learn: 0.0535327	total: 287ms	remaining: 530ms
26:	learn: 0.0508795	total: 295ms	remaining: 513ms
27:	learn: 0.0498175	total: 302ms	remaining: 497ms
28:	learn: 0.0473732	total: 310ms	remaining: 481ms
29:	learn: 0.0454775	total: 320ms	remaining: 469ms
30:	learn: 0.0437787	total: 329ms	remaining: 456ms
31:	learn: 0.0408061	total: 337ms	remaining: 442ms
32:	learn: 0.0388317	total: 345ms	remaining: 428ms
33:	learn: 0.0370668	total: 353ms	remaining: 415ms
34:	learn: 0.0353856	total: 360

41:	learn: 0.0267254	total: 370ms	remaining: 282ms
42:	learn: 0.0261088	total: 379ms	remaining: 273ms
43:	learn: 0.0259916	total: 388ms	remaining: 265ms
44:	learn: 0.0253310	total: 398ms	remaining: 256ms
45:	learn: 0.0246048	total: 405ms	remaining: 247ms
46:	learn: 0.0239975	total: 413ms	remaining: 237ms
47:	learn: 0.0235885	total: 421ms	remaining: 228ms
48:	learn: 0.0232183	total: 430ms	remaining: 219ms
49:	learn: 0.0229799	total: 440ms	remaining: 211ms
50:	learn: 0.0224050	total: 448ms	remaining: 202ms
51:	learn: 0.0220139	total: 456ms	remaining: 193ms
52:	learn: 0.0210466	total: 465ms	remaining: 184ms
53:	learn: 0.0206289	total: 475ms	remaining: 176ms
54:	learn: 0.0202475	total: 483ms	remaining: 167ms
55:	learn: 0.0197899	total: 490ms	remaining: 158ms
56:	learn: 0.0193853	total: 497ms	remaining: 148ms
57:	learn: 0.0192482	total: 505ms	remaining: 139ms
58:	learn: 0.0188602	total: 513ms	remaining: 130ms
59:	learn: 0.0184869	total: 521ms	remaining: 122ms
60:	learn: 0.0180125	total: 528

62:	learn: 0.0176563	total: 557ms	remaining: 97.2ms
63:	learn: 0.0173850	total: 568ms	remaining: 88.8ms
64:	learn: 0.0170996	total: 578ms	remaining: 80ms
65:	learn: 0.0167648	total: 590ms	remaining: 71.5ms
66:	learn: 0.0165151	total: 599ms	remaining: 62.6ms
67:	learn: 0.0163510	total: 608ms	remaining: 53.7ms
68:	learn: 0.0160953	total: 618ms	remaining: 44.8ms
69:	learn: 0.0160482	total: 625ms	remaining: 35.7ms
70:	learn: 0.0158306	total: 633ms	remaining: 26.8ms
71:	learn: 0.0157770	total: 640ms	remaining: 17.8ms
72:	learn: 0.0155104	total: 648ms	remaining: 8.87ms
73:	learn: 0.0153888	total: 656ms	remaining: 0us
Iteration No: 5 ended. Search finished for the next optimal point.
Time taken: 4.2523
Function value obtained: -0.9052
Current minimum: -0.9379
Iteration No: 6 started. Searching for the next optimal point.
0:	learn: 0.5326376	total: 6.34ms	remaining: 165ms
1:	learn: 0.4684025	total: 11.4ms	remaining: 142ms
2:	learn: 0.4170729	total: 16.8ms	remaining: 134ms
3:	learn: 0.4062195	t

25:	learn: 0.0608910	total: 199ms	remaining: 481ms
26:	learn: 0.0602111	total: 210ms	remaining: 482ms
27:	learn: 0.0580111	total: 221ms	remaining: 482ms
28:	learn: 0.0555790	total: 230ms	remaining: 475ms
29:	learn: 0.0516135	total: 238ms	remaining: 469ms
30:	learn: 0.0514605	total: 251ms	remaining: 470ms
31:	learn: 0.0461079	total: 261ms	remaining: 464ms
32:	learn: 0.0455746	total: 267ms	remaining: 453ms
33:	learn: 0.0455193	total: 274ms	remaining: 443ms
34:	learn: 0.0453032	total: 280ms	remaining: 432ms
35:	learn: 0.0434121	total: 287ms	remaining: 422ms
36:	learn: 0.0407882	total: 293ms	remaining: 412ms
37:	learn: 0.0390171	total: 300ms	remaining: 402ms
38:	learn: 0.0385197	total: 306ms	remaining: 393ms
39:	learn: 0.0370541	total: 313ms	remaining: 383ms
40:	learn: 0.0367683	total: 320ms	remaining: 375ms
41:	learn: 0.0359683	total: 327ms	remaining: 366ms
42:	learn: 0.0358106	total: 334ms	remaining: 357ms
43:	learn: 0.0351937	total: 341ms	remaining: 349ms
44:	learn: 0.0338444	total: 348

24:	learn: 0.0828787	total: 195ms	remaining: 500ms
25:	learn: 0.0760346	total: 205ms	remaining: 497ms
26:	learn: 0.0715483	total: 216ms	remaining: 496ms
27:	learn: 0.0676726	total: 223ms	remaining: 487ms
28:	learn: 0.0670577	total: 231ms	remaining: 478ms
29:	learn: 0.0631361	total: 239ms	remaining: 469ms
30:	learn: 0.0627082	total: 246ms	remaining: 460ms
31:	learn: 0.0622750	total: 253ms	remaining: 450ms
32:	learn: 0.0592192	total: 260ms	remaining: 441ms
33:	learn: 0.0560338	total: 266ms	remaining: 431ms
34:	learn: 0.0551041	total: 273ms	remaining: 421ms
35:	learn: 0.0544430	total: 280ms	remaining: 412ms
36:	learn: 0.0511226	total: 287ms	remaining: 403ms
37:	learn: 0.0485702	total: 294ms	remaining: 394ms
38:	learn: 0.0463165	total: 300ms	remaining: 385ms
39:	learn: 0.0458231	total: 307ms	remaining: 376ms
40:	learn: 0.0434644	total: 314ms	remaining: 368ms
41:	learn: 0.0432544	total: 320ms	remaining: 359ms
42:	learn: 0.0430643	total: 327ms	remaining: 350ms
43:	learn: 0.0429085	total: 335

24:	learn: 0.0701143	total: 186ms	remaining: 476ms
25:	learn: 0.0668253	total: 194ms	remaining: 470ms
26:	learn: 0.0634987	total: 203ms	remaining: 466ms
27:	learn: 0.0618979	total: 210ms	remaining: 458ms
28:	learn: 0.0581961	total: 217ms	remaining: 449ms
29:	learn: 0.0539064	total: 225ms	remaining: 443ms
30:	learn: 0.0511557	total: 232ms	remaining: 434ms
31:	learn: 0.0506542	total: 239ms	remaining: 427ms
32:	learn: 0.0504482	total: 246ms	remaining: 417ms
33:	learn: 0.0483630	total: 253ms	remaining: 409ms
34:	learn: 0.0478922	total: 260ms	remaining: 401ms
35:	learn: 0.0454100	total: 266ms	remaining: 392ms
36:	learn: 0.0424830	total: 273ms	remaining: 384ms
37:	learn: 0.0401678	total: 280ms	remaining: 376ms
38:	learn: 0.0390044	total: 287ms	remaining: 368ms
39:	learn: 0.0387112	total: 294ms	remaining: 361ms
40:	learn: 0.0385275	total: 302ms	remaining: 354ms
41:	learn: 0.0371515	total: 310ms	remaining: 346ms
42:	learn: 0.0370343	total: 317ms	remaining: 339ms
43:	learn: 0.0361265	total: 325

29:	learn: 0.1358562	total: 217ms	remaining: 397ms
30:	learn: 0.1332356	total: 224ms	remaining: 391ms
31:	learn: 0.1309291	total: 234ms	remaining: 388ms
32:	learn: 0.1292215	total: 241ms	remaining: 380ms
33:	learn: 0.1275235	total: 248ms	remaining: 372ms
34:	learn: 0.1259826	total: 255ms	remaining: 364ms
35:	learn: 0.1237557	total: 261ms	remaining: 356ms
36:	learn: 0.1227359	total: 269ms	remaining: 349ms
37:	learn: 0.1211544	total: 276ms	remaining: 341ms
38:	learn: 0.1195535	total: 282ms	remaining: 333ms
39:	learn: 0.1180944	total: 289ms	remaining: 325ms
40:	learn: 0.1171127	total: 296ms	remaining: 318ms
41:	learn: 0.1160891	total: 303ms	remaining: 310ms
42:	learn: 0.1152126	total: 310ms	remaining: 302ms
43:	learn: 0.1141339	total: 318ms	remaining: 296ms
44:	learn: 0.1135704	total: 324ms	remaining: 288ms
45:	learn: 0.1126265	total: 333ms	remaining: 282ms
46:	learn: 0.1117964	total: 341ms	remaining: 276ms
47:	learn: 0.1105482	total: 349ms	remaining: 269ms
48:	learn: 0.1094170	total: 355

26:	learn: 0.1445520	total: 194ms	remaining: 417ms
27:	learn: 0.1420999	total: 201ms	remaining: 410ms
28:	learn: 0.1394806	total: 209ms	remaining: 403ms
29:	learn: 0.1367684	total: 218ms	remaining: 401ms
30:	learn: 0.1353358	total: 225ms	remaining: 393ms
31:	learn: 0.1336597	total: 232ms	remaining: 385ms
32:	learn: 0.1315691	total: 239ms	remaining: 376ms
33:	learn: 0.1302081	total: 246ms	remaining: 369ms
34:	learn: 0.1283223	total: 253ms	remaining: 361ms
35:	learn: 0.1260949	total: 259ms	remaining: 353ms
36:	learn: 0.1247072	total: 266ms	remaining: 345ms
37:	learn: 0.1236098	total: 273ms	remaining: 337ms
38:	learn: 0.1217691	total: 279ms	remaining: 330ms
39:	learn: 0.1203845	total: 286ms	remaining: 322ms
40:	learn: 0.1195944	total: 293ms	remaining: 315ms
41:	learn: 0.1187852	total: 300ms	remaining: 307ms
42:	learn: 0.1180942	total: 306ms	remaining: 299ms
43:	learn: 0.1167053	total: 313ms	remaining: 292ms
44:	learn: 0.1160122	total: 320ms	remaining: 284ms
45:	learn: 0.1153457	total: 327

14:	learn: 0.1649286	total: 202ms	remaining: 430ms
15:	learn: 0.1574814	total: 217ms	remaining: 421ms
16:	learn: 0.1484598	total: 232ms	remaining: 410ms
17:	learn: 0.1418533	total: 245ms	remaining: 395ms
18:	learn: 0.1337991	total: 257ms	remaining: 379ms
19:	learn: 0.1254085	total: 269ms	remaining: 364ms
20:	learn: 0.1209518	total: 282ms	remaining: 349ms
21:	learn: 0.1171170	total: 296ms	remaining: 337ms
22:	learn: 0.1125970	total: 309ms	remaining: 323ms
23:	learn: 0.1089597	total: 322ms	remaining: 308ms
24:	learn: 0.1047845	total: 334ms	remaining: 294ms
25:	learn: 0.1017118	total: 347ms	remaining: 280ms
26:	learn: 0.0972526	total: 359ms	remaining: 266ms
27:	learn: 0.0915086	total: 371ms	remaining: 251ms
28:	learn: 0.0879257	total: 382ms	remaining: 237ms
29:	learn: 0.0838670	total: 395ms	remaining: 224ms
30:	learn: 0.0820270	total: 411ms	remaining: 212ms
31:	learn: 0.0801408	total: 426ms	remaining: 200ms
32:	learn: 0.0775789	total: 441ms	remaining: 187ms
33:	learn: 0.0757915	total: 454

45:	learn: 0.0608295	total: 598ms	remaining: 13ms
46:	learn: 0.0594774	total: 613ms	remaining: 0us
0:	learn: 0.5906590	total: 15.2ms	remaining: 701ms
1:	learn: 0.5141531	total: 31.2ms	remaining: 703ms
2:	learn: 0.4488344	total: 47.4ms	remaining: 695ms
3:	learn: 0.4016373	total: 60.5ms	remaining: 650ms
4:	learn: 0.3618361	total: 73.2ms	remaining: 615ms
5:	learn: 0.3286771	total: 86.6ms	remaining: 592ms
6:	learn: 0.2993858	total: 99.7ms	remaining: 570ms
7:	learn: 0.2714804	total: 113ms	remaining: 549ms
8:	learn: 0.2509070	total: 125ms	remaining: 530ms
9:	learn: 0.2319025	total: 138ms	remaining: 512ms
10:	learn: 0.2127001	total: 151ms	remaining: 495ms
11:	learn: 0.1959111	total: 165ms	remaining: 481ms
12:	learn: 0.1839707	total: 178ms	remaining: 466ms
13:	learn: 0.1765742	total: 191ms	remaining: 451ms
14:	learn: 0.1696104	total: 206ms	remaining: 439ms
15:	learn: 0.1637125	total: 220ms	remaining: 427ms
16:	learn: 0.1574041	total: 233ms	remaining: 411ms
17:	learn: 0.1494755	total: 245ms	rem

18:	learn: 0.1494019	total: 200ms	remaining: 147ms
19:	learn: 0.1424523	total: 213ms	remaining: 138ms
20:	learn: 0.1388082	total: 226ms	remaining: 129ms
21:	learn: 0.1351689	total: 237ms	remaining: 118ms
22:	learn: 0.1313706	total: 249ms	remaining: 108ms
23:	learn: 0.1284497	total: 260ms	remaining: 97.6ms
24:	learn: 0.1246675	total: 271ms	remaining: 86.7ms
25:	learn: 0.1214408	total: 281ms	remaining: 75.7ms
26:	learn: 0.1183490	total: 291ms	remaining: 64.7ms
27:	learn: 0.1139598	total: 301ms	remaining: 53.7ms
28:	learn: 0.1112039	total: 311ms	remaining: 42.9ms
29:	learn: 0.1091458	total: 321ms	remaining: 32.1ms
30:	learn: 0.1054069	total: 331ms	remaining: 21.4ms
31:	learn: 0.1029952	total: 341ms	remaining: 10.7ms
32:	learn: 0.0997357	total: 350ms	remaining: 0us
0:	learn: 0.5299978	total: 11.1ms	remaining: 356ms
1:	learn: 0.4749444	total: 22.9ms	remaining: 355ms
2:	learn: 0.4203363	total: 33.3ms	remaining: 333ms
3:	learn: 0.3630826	total: 43.8ms	remaining: 317ms
4:	learn: 0.3302133	tota

In [58]:
# Convert ordered dictionary to dictionary
import json
best_params_bayesian = json.loads(json.dumps(best_params))
print(best_params_bayesian)

{'bagging_temperature': 0.8123959883573634, 'border_count': 45, 'depth': 5, 'iterations': 82, 'l2_leaf_reg': 17, 'learning_rate': 0.10450048882284352, 'random_strength': 7.558005328603015, 'scale_pos_weight': 0.8739040497768427}


In [59]:
%%time
tuned_model = CatBoostClassifier(**best_params_bayesian, # **best_params, 
                                 task_type = "CPU", 
                                 od_type='Iter', 
                                 one_hot_max_size=10)
tuned_model.fit(X = data, 
             y = targets)

0:	learn: 0.6681680	total: 2.09ms	remaining: 169ms
1:	learn: 0.6539092	total: 3.73ms	remaining: 149ms
2:	learn: 0.6339582	total: 5.22ms	remaining: 138ms
3:	learn: 0.6193554	total: 6.71ms	remaining: 131ms
4:	learn: 0.6038448	total: 8.21ms	remaining: 126ms
5:	learn: 0.5911654	total: 9.91ms	remaining: 126ms
6:	learn: 0.5648462	total: 13.5ms	remaining: 145ms
7:	learn: 0.5417408	total: 16.6ms	remaining: 153ms
8:	learn: 0.5255333	total: 20.6ms	remaining: 167ms
9:	learn: 0.5118253	total: 31.6ms	remaining: 227ms
10:	learn: 0.5078489	total: 33.1ms	remaining: 214ms
11:	learn: 0.5009593	total: 34.7ms	remaining: 202ms
12:	learn: 0.4902377	total: 36.1ms	remaining: 192ms
13:	learn: 0.4863256	total: 39.6ms	remaining: 192ms
14:	learn: 0.4818171	total: 46.3ms	remaining: 207ms
15:	learn: 0.4729135	total: 48.3ms	remaining: 199ms
16:	learn: 0.4672864	total: 51.7ms	remaining: 198ms
17:	learn: 0.4604640	total: 53.2ms	remaining: 189ms
18:	learn: 0.4519976	total: 54.7ms	remaining: 181ms
19:	learn: 0.4456523	t

In [60]:
# make the prediction using the resulting model
preds_class = tuned_model.predict(data, prediction_type='Class')
preds_raw_vals = tuned_model.predict(data, prediction_type='RawFormulaVal')
preds_proba = tuned_model.predict(data, prediction_type='Probability')

# Eller preds_proba = cb_model.predict_proba(data)
print("class = ", preds_class)
print("proba = ", preds_proba)
print("proba = ", preds_raw_vals)

class =  ['R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R'
 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R'
 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R'
 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R'
 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R'
 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M'
 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M'
 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M'
 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M'
 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M'
 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M'
 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M']
proba =  [[0.31677529 0.68322471]
 [0.31583473 0.68416527]
 [0.27349053 0.72650947]
 [0.2623095  0.7376905 ]
 [0.21032579 0.78967421]
 [0.14147129

In [62]:
# Confusion matrix
from sklearn.metrics import confusion_matrix
confusion_matrix(y_true = y, y_pred = preds_class)


array([[111,   0],
       [  0,  97]], dtype=int64)

In [58]:
# RoC Curve
