# Optimization Examples

In [1]:
# # Change path to project root
%cd ..

/home/amirhessam/Documents/GitHub/slick-ml


In [2]:
%load_ext autoreload

# widen the screen
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

# change the path and loading class
import os, sys
import pandas as pd
import numpy as np
import seaborn as sns

In [3]:
%autoreload
from slickml.optimization import XGBoostClassifierBayesianOpt

# XGBoostClassifierBayesianOpt

## Example 1

In [4]:
# loading data
df = pd.read_csv("data/dummy_data.csv")
df.head()

Unnamed: 0,F1,F2,F3,F4,F5,F6,CLASS
0,5.7,4.7,3.7,58.0,249.3,456.47,0
1,7.7,6.6,4.1,20.0,260.92,443.43,1
2,6.2,4.3,4.6,59.0,255.63,478.96,1
3,5.7,4.4,3.8,49.0,195.28,381.94,0
4,9.1,4.7,4.6,17.0,259.55,395.67,1


In [5]:
# define X, y
y = df.CLASS.values
X = df.drop(["CLASS"], axis=1)

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,
                                                    shuffle=True,
                                                    stratify=y,
                                                    random_state=1367)

In [7]:
# define bayesian optimizer 
xbo = XGBoostClassifierBayesianOpt()

In [8]:
# train the optimizer on train set
xbo.fit(X_train, y_train)

|   iter    |  target   | colsam... |   gamma   | learni... | max_depth | min_ch... | reg_alpha | reg_la... | subsample |
-------------------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.8245  [0m | [0m 0.8975  [0m | [0m 0.04571 [0m | [0m 0.6628  [0m | [0m 4.238   [0m | [0m 1.436   [0m | [0m 0.3064  [0m | [0m 0.7136  [0m | [0m 0.1931  [0m |
| [95m 2       [0m | [95m 0.8374  [0m | [95m 0.7904  [0m | [95m 0.6447  [0m | [95m 0.9152  [0m | [95m 3.334   [0m | [95m 3.238   [0m | [95m 0.7772  [0m | [95m 0.269   [0m | [95m 0.9726  [0m |
| [0m 3       [0m | [0m 0.8199  [0m | [0m 0.8498  [0m | [0m 0.6044  [0m | [0m 0.6874  [0m | [0m 6.651   [0m | [0m 15.7    [0m | [0m 0.061   [0m | [0m 0.5114  [0m | [0m 0.6848  [0m |
| [95m 4       [0m | [95m 0.8551  [0m | [95m 0.7297  [0m | [95m 0.8513  [0m | [95m 0.4627  [0m | [95m 4.757   [0m | [95m 4.965 

In [9]:
# optimization results (or xbo.optimization_results_)
xbo.get_optimization_results()

Unnamed: 0,colsample_bytree,gamma,learning_rate,max_depth,min_child_weight,reg_alpha,reg_lambda,subsample,auc
0,0.89753,0.045712,0.662807,4.238468,1.43566,0.306424,0.713585,0.193055,0.824512
1,0.790404,0.644709,0.91519,3.334492,3.23828,0.777161,0.26901,0.972576,0.837449
2,0.849819,0.60437,0.687435,6.651023,15.698338,0.061001,0.511379,0.684811,0.819881
3,0.729727,0.851274,0.462704,4.756996,4.964748,0.932765,0.362983,0.936539,0.855133
4,0.542456,0.545092,0.878165,6.632704,5.028311,0.184497,0.333049,0.912511,0.81725
5,0.433647,0.281979,0.30171,4.287809,15.997975,0.061965,0.823726,0.892268,0.809917
6,0.527662,0.710147,0.316749,2.901123,9.435776,0.691293,0.125812,0.646834,0.838585
7,0.129065,0.224998,0.896721,6.215583,3.885759,0.509329,0.256423,0.391094,0.807489
8,0.784692,0.884516,0.463588,4.454321,4.869008,0.969036,0.362743,0.979997,0.854646
9,0.821392,1.0,0.231482,4.570813,5.632603,1.0,0.394688,1.0,0.864417


In [10]:
# best performance (or xbo.best_performance_)
xbo.get_best_performance()

Unnamed: 0,colsample_bytree,gamma,learning_rate,max_depth,min_child_weight,reg_alpha,reg_lambda,subsample,auc
0,0.821392,1.0,0.231482,4.570813,5.632603,1.0,0.394688,1.0,0.864417


In [11]:
# tuned params (or xbo.best_params_)
xbo.get_best_params()

{'colsample_bytree': 0.8213916662259918,
 'gamma': 1.0,
 'learning_rate': 0.23148232373451072,
 'max_depth': 4,
 'min_child_weight': 5.632602921054691,
 'reg_alpha': 1.0,
 'reg_lambda': 0.39468801734425263,
 'subsample': 1.0}

In [12]:
# optimizer object (or xbo.optimizer_)
xbo.get_optimizer()

<bayes_opt.bayesian_optimization.BayesianOptimization at 0x7f1c443f2810>

In [13]:
# optimization params boundaries 
xbo.get_pbounds()

{'max_depth': (2, 7),
 'learning_rate': (0, 1),
 'min_child_weight': (1, 20),
 'colsample_bytree': (0.1, 1.0),
 'subsample': (0.1, 1),
 'gamma': (0, 1),
 'reg_alpha': (0, 1),
 'reg_lambda': (0, 1)}

In [14]:
xbo.pbounds

{'max_depth': (2, 7),
 'learning_rate': (0, 1),
 'min_child_weight': (1, 20),
 'colsample_bytree': (0.1, 1.0),
 'subsample': (0.1, 1),
 'gamma': (0, 1),
 'reg_alpha': (0, 1),
 'reg_lambda': (0, 1)}

In [15]:
# from bayes_opt import BayesianOptimization
# import xgboost as xgb
# import numpy as np
# import pandas as pd 
# import matplotlib.pyplot as plt
# from sklearn.preprocessing import scale
# import warnings
# warnings.simplefilter("ignore")
# from IPython.core.display import display, HTML
# display(HTML("<style>.container { width:95% !important; }</style>"))
# import seaborn as sns
# sns.set_style("ticks")
# %matplotlib inline

# def _my_bayesian_optimization(X, Y, n_iter = 5,
#                                        init_points = 5,
#                                        acq = "ei",
#                                        num_boost_round = 1000,
#                                        nfold = 10,
#                                        stratified = True,
#                                        metrics = ("auc"),
#                                        early_stopping_rounds = 20,
#                                        seed = 1367,
#                                        shuffle = True,
#                                        show_stdv = False,
#                                        pbounds = None,
#                                        importance_type = "total_gain",
#                                        callbacks = False,
#                                        verbose_eval = False):
#     """
#     a function to run bayesian optimization for xgboost
#     input parameters:
#                     X: features (pandas dataframe or numpy array)
#                     Y: targets (1D array or list)
#                     n_iter: total number of bayesian iterations (default = 5)
#                     init_points: total initial points of optimization (default = 5)
#                     acq
#                     num_boost_rounds: max number of boosting rounds, (default = 1000)
#                     stratified: stratificaiton of the targets (default = True)
#                     metrics: classification/regression metrics (default = ("auc))
#                     early_stopping_rounds: the criteria for stopping if the test metric is not improved (default = 20)
#                     seed: random seed (default = 1367)
#                     shuffle: shuffling the data (default = True)
#                     show_stdv = showing standard deviation of cv results (default = False)
#                     pbounds = set of parameters for bayesian optimization of xgboost cv
#                             (default_params = {
#                                                "eval_metric" : "auc",
#                                                "tree_method": "hist",
#                                                "objective" : "binary:logistic",
#                                                "learning_rate" : 0.05,
#                                                "max_depth": 2,
#                                                "min_child_weight": 1,
#                                                "gamma" : 0.0,
#                                                "reg_alpha" : 0.0,
#                                                "reg_lambda" : 1.0,
#                                                "subsample" : 0.9,
#                                                "max_delta_step": 1,
#                                                "silent" : 1,
#                                                "nthread" : 4,
#                                                "scale_pos_weight" : 1
#                                                }
#                             )
#                     importance_type = importance type of xgboost as string (default = "total_gain")
#                                       the other options will be "weight", "gain", "cover", and "total_cover"
#                     callbacks = printing callbacks for xgboost cv
#                                 (defaults = False, if True: [xgb.callback.print_evaluation(show_stdv = show_stdv),
#                                                              xgb.callback.early_stop(early_stopping_rounds)])
#                     verbose_eval : a flag to show the result during train on train/test sets (default = False)
#     outputs:
#             df_res: the parameters related to the best performance
#             xgb_params: a dictionary of the best parameters of xgboost                
    
    
#     """

    
#     # callback flag
#     if(callbacks == True):
#         callbacks = [xgb.callback.print_evaluation(show_stdv = show_stdv),
#                      xgb.callback.early_stop(early_stopping_rounds)]
#     else:
#         callbacks = None    
    
#     # pbounds
#     default_pbounds = {"max_depth" : (2, 5),
#                        "learning_rate" : (0, 1), 
#                        "min_child_weight" : (1, 20),
#                        "subsample" : (0.1, 1),
#                        "gamma": (0, 1),
#                        "colsample_bytree": (0.1, 1.0)
#                       }
    
#     # updating the default parameters of the pbounds
#     if pbounds is not None:
#         for key, val in pbounds.items():
#             default_pbounds[key] = val
    
    
#     def __xgb_eval(learning_rate,
#                    max_depth,
#                    gamma,
#                    colsample_bytree,
#                    min_child_weight,
#                    subsample):

#         params = {"eval_metric" : "auc",
#                   "tree_method": "hist",
#                   "objective" : "binary:logistic",
#                   "max_delta_step": 1,
#                   "silent" : 1,
#                   "nthread" : 4,
#                   "scale_pos_weight" : 1,
#                   "reg_alpha" : 0.0,
#                   "reg_lambda" : 1.0,
#                   "learning_rate" : learning_rate,
#                   "max_depth": int(max_depth),
#                   "min_child_weight": min_child_weight,
#                   "gamma" : gamma,
#                   "subsample" : subsample,
#                   "colsample_bytree" : colsample_bytree 
#                  }
#         dtrain = xgb.DMatrix(data = X, label = Y)
#         cv_result = xgb.cv(params = params,
#                            dtrain = dtrain,
#                            num_boost_round = num_boost_round,
#                            nfold = nfold,
#                            stratified = stratified,
#                            metrics = metrics,
#                            early_stopping_rounds = early_stopping_rounds,
#                            seed = seed,
#                            verbose_eval = verbose_eval,
#                            shuffle = shuffle,
#                            callbacks = callbacks)

#         return cv_result.iloc[-1][2]
    

#     xgb_bo = bo(__xgb_eval, default_pbounds, random_state = seed, verbose = 3)
#     xgb_bo.maximize(init_points = init_points, n_iter = n_iter, acq = acq)
    
    
#     targets = []
#     for i, rs in enumerate(xgb_bo.res):
#         targets.append(rs["target"])
#     best_params = xgb_bo.res[targets.index(max(targets))]["params"]
#     best_params["max_depth"] = int(best_params["max_depth"])
    
#     xgb_params = {"eval_metric" : "auc",
#                   "tree_method": "hist",
#                   "objective" : "binary:logistic",
#                   "max_delta_step": 1,
#                   "silent" : 1,
#                   "nthread" : 4,
#                   "scale_pos_weight" : 1,
#                   "reg_alpha" : 0.0,
#                   "reg_lambda" : 1.0,
#                   "learning_rate" : 0.05,
#                   "max_depth": 2,
#                   "min_child_weight": 5,
#                   "gamma" : 0.0,
#                   "subsample" : 1.0,
#                   "colsample_bytree" : 0.9 
#                  }
#     for key, val in best_params.items():
#         xgb_params[key] = val
    
#     dtrain = xgb.DMatrix(data = X, label = Y)
#     bst = xgb.train(params = xgb_params,
#                     dtrain = dtrain,
#                     num_boost_round = num_boost_round)
    
#     # build results dataframe
#     frames = []
#     for idx, res in enumerate(xgb_bo.res):
#         d = res['params']
#         d[metrics] = res["target"]
#         frames.append(pd.DataFrame(data = d, index = [idx]))
    
#     res_df = pd.concat(frames)
   
#     print(F"-*-*-*-*-*-* Optimization Results -*-*-*-*-*-*")
#     display(res_df)
    
#     # Plotting
#     import matplotlib as mpl

#     mpl.rcParams['axes.linewidth'] = 3 
#     mpl.rcParams['lines.linewidth'] = 3
#     cols = [col for col in res_df.columns.tolist() if col != "auc"]
#     ip = 1
#     plt.figure(figsize = (22, 10))
#     colors = ["navy", "lavender", "lightblue", "cyan", "cadetblue", "slateblue"]
#     for col in cols:
#         res_df.sort_values(by = col, inplace=True)
#         plt.subplot(2,3,ip)
#         plt.plot(res_df.loc[:, col], res_df.loc[:, metrics], color = colors[ip-1])
#         plt.xlabel(F"{col}", fontsize = 20)
#         plt.ylabel(F"{metrics}", fontsize = 20)
#         plt.tick_params(axis='both', which='major', labelsize = 12)
#         ip += 1
#     plt.show()
    
#     print(F"-*-*-*-*-*-* Best Performance -*-*-*-*-*-*")
#     display(res_df.loc[res_df[metrics] == res_df[metrics].max(), :])
    
#     from xgboost import plot_importance
#     from pylab import rcParams
#     rcParams['figure.figsize'] = (10,10)
#     plot_importance(bst, importance_type = importance_type, color = "skyblue", xlabel = importance_type)
#     plt.show()   
    
#     return res_df, xgb_params