In [32]:
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.metrics import roc_auc_score 
from sklearn import datasets
from sklearn import linear_model
from bayes_opt import BayesianOptimization
import xgboost as xgb

In [33]:
breast_cancer = datasets.load_breast_cancer()

In [34]:
lr = linear_model.LogisticRegression(C=1)
lr.fit(breast_cancer.data, breast_cancer.target)
roc_auc_score(breast_cancer.target,lr.predict_proba(breast_cancer.data)[:,1])

0.9948073569050261

In [35]:
cross_val_score(lr,breast_cancer.data, breast_cancer.target, scoring = 'roc_auc', cv = 5).mean()

0.9925944556226247

In [36]:
#sklearn使用
#定义目标函数
def lrcv(c):
    return cross_val_score(linear_model.LogisticRegression(C=int(c)),breast_cancer.data, breast_cancer.target, scoring = 'roc_auc', cv = 5).mean()
#设置bayesopt
bo = BayesianOptimization(lrcv,
                          {'c': (1,5)})
#高斯过程参数
gp_params = {'kernel': None,
             'alpha': 1e-5}
#优化
bo.maximize(n_iter=10, acq='ei', **gp_params)
#结果,最大值，最大值参数
bo.res,bo.res['max'],bo.res['max']['max_params']

[31mInitialization[0m
[94m-----------------------------------------[0m
 Step |   Time |      Value |         c | 
    1 | 00m00s | [35m   0.99240[0m | [32m   2.6128[0m | 
    2 | 00m00s |    0.99234 |    4.5256 | 
    3 | 00m00s | [35m   0.99259[0m | [32m   1.9899[0m | 
    4 | 00m00s |    0.99259 |    1.7732 | 
    5 | 00m00s |    0.99227 |    3.3269 | 
[31mBayesian Optimization[0m
[94m-----------------------------------------[0m
 Step |   Time |      Value |         c | 
    6 | 00m01s |    0.99214 |    5.0000 | 
    7 | 00m01s |    0.99259 |    1.0000 | 
    8 | 00m03s |    0.99227 |    3.9537 | 
    9 | 00m05s |    0.99259 |    1.2582 | 
   10 | 00m05s |    0.99234 |    4.8116 | 
   11 | 00m04s |    0.99240 |    2.2407 | 
   12 | 00m04s |    0.99240 |    2.9399 | 
   13 | 00m04s |    0.99259 |    1.1825 | 
   14 | 00m04s |    0.99227 |    3.6310 | 
   15 | 00m04s |    0.99234 |    4.2379 | 


({'max': {'max_val': 0.9925944556226247,
   'max_params': {'c': 1.9898655750404752}},
  'all': {'values': [0.9921397896422463,
    0.9925944556226247,
    0.9922665190107051,
    0.9925944556226247,
    0.9923385272877576,
    0.9923957179771132,
    0.9923957179771132,
    0.9925944556226247,
    0.9922665190107051,
    0.9923385272877576],
   'params': [{'c': 5.0},
    {'c': 1.0},
    {'c': 3.9536902709921677},
    {'c': 1.2581829998752758},
    {'c': 4.81156450822061},
    {'c': 2.240689289526753},
    {'c': 2.9398908525513656},
    {'c': 1.1825325393308384},
    {'c': 3.631007465102566},
    {'c': 4.237855509722161}]}},
 {'max_val': 0.9925944556226247, 'max_params': {'c': 1.9898655750404752}},
 {'c': 1.9898655750404752})

In [30]:
#xgboost
#对于int型参数要加上int函数强制转化为整数,可以查看xgboost文档以确定哪些参数为int型
def xgbcv(max_depth, learning_rate, n_estimators, gamma, min_child_weight, subsample, colsample_bytree, colsample_bylevel, reg_alpha, reg_lambda, scale_pos_weight):
    xgbcl = xgb.XGBClassifier(max_depth = int(max_depth), 
                            learning_rate = learning_rate, 
                            n_estimators = int(n_estimators), 
                            objective='binary:logistic', 
                            booster='gbtree', 
                            gamma=gamma, 
                            min_child_weight=int(min_child_weight),  
                            subsample=subsample, 
                            colsample_bytree=colsample_bytree, 
                            colsample_bylevel=colsample_bylevel, 
                            reg_alpha=reg_alpha, 
                            reg_lambda=reg_lambda, 
                            scale_pos_weight=scale_pos_weight)
    return cross_val_score(xgbcl, breast_cancer.data, breast_cancer.target, scoring = 'roc_auc', cv = 5).mean()
#设置bayesopt， tuple里为最小值和最大值
bo_xgb = BayesianOptimization(xgbcv,
                          {'max_depth': (3,9),
                           'learning_rate':(0.01,0.1),
                           'n_estimators':(100,500),
                           'gamma':(0,0.1),
                           'min_child_weight':(1,20),
                           'subsample':(0.6,1),
                           'colsample_bytree':(0.6,1),
                           'colsample_bylevel':(0.6,1),
                           'reg_alpha':(0,1),
                           'reg_lambda':(0,10),
                           'scale_pos_weight':(1,3)})
#高斯过程参数
gp_params = {'kernel': None,
             'alpha': 1e-5}
#优化
bo_xgb.maximize(n_iter=50, acq='ei', **gp_params)
#结果,最大值，最大值参数
print(bo_xgb.res)
print(bo_xgb.res['max'])
print(bo_xgb.res['max']['max_params'])

[31mInitialization[0m
[94m--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |   colsample_bylevel |   colsample_bytree |     gamma |   learning_rate |   max_depth |   min_child_weight |   n_estimators |   reg_alpha |   reg_lambda |   scale_pos_weight |   subsample | 
    1 | 00m00s | [35m   0.99213[0m | [32m             0.8438[0m | [32m            0.6126[0m | [32m   0.0364[0m | [32m         0.0515[0m | [32m     6.4537[0m | [32m            5.0823[0m | [32m      425.6906[0m | [32m     0.9779[0m | [32m      6.5300[0m | [32m            2.1251[0m | [32m     0.9705[0m | 
    2 | 00m00s |    0.98733 |              0.9935 |             0.8055 |    0.0308 |          0.0196 |      3.1717 |            14.8735 |       241.9377 |      0.6225 |       8.6234 |             1.00

   33 | 00m02s |    0.98942 |              0.6869 |             1.0000 |    0.1000 |          0.1000 |      3.9388 |            12.8734 |       359.3007 |      0.4571 |       6.5938 |             1.3771 |      1.0000 | 
   34 | 00m02s |    0.98837 |              0.8288 |             0.9395 |    0.0691 |          0.0658 |      3.8835 |            18.5965 |       399.7859 |      0.5434 |       6.0761 |             2.2247 |      0.9709 | 
   35 | 00m03s |    0.98585 |              0.9001 |             0.8690 |    0.0738 |          0.0659 |      3.9044 |            18.3083 |       400.1015 |      0.4308 |       5.8815 |             1.8349 |      0.7296 | 
   36 | 00m03s |    0.98748 |              0.8265 |             0.9572 |    0.0678 |          0.0951 |      3.6268 |            17.9958 |       399.3767 |      0.3560 |       6.1127 |             1.8839 |      0.8037 | 
   37 | 00m03s |    0.98862 |              0.8066 |             0.9699 |    0.0739 |          0.0939 |      3.7655 |    

In [37]:
#以lr模型为例，说明几种搜索策略
#定义提取函数的目标就是为了有目的的去选取采样点，显然，提取的时候有两个方向：
#1. explore，尽可能的探索未知的空间，这样对f(x)的后验概率才会更接近f(x)
#2. exploit，强化已有的结果，在现有最大值的附近进行探索，保证找到的f(x)会更大

In [38]:
#1、probability of improvement(POI)
#这种方法考虑的是让新的采样能提升最大值的概率最大，值得注意的是，PI使用的是一种贪心搜索的策略，因此一定程度上说更像是局部搜索。
#xi较小时，倾向于exploit，当xi较大时，倾向于explore
#可以看到xi取小值时，c在最大点处浮动
bo.maximize(init_points=2, n_iter=5, acq="poi", xi=1e-4, **gp_params)

[31mBayesian Optimization[0m
[94m-----------------------------------------[0m
 Step |   Time |      Value |         c | 
   16 | 00m14s |    0.99259 |    1.1211 | 
   17 | 00m10s |    0.99259 |    1.1211 | 
   18 | 00m10s |    0.99259 |    1.1211 | 
   19 | 00m12s |    0.99259 |    1.1211 | 
   20 | 00m14s |    0.99259 |    1.1212 | 


In [39]:
#可以看到xi取小值时，c在新空间搜索
bo.maximize(init_points=2, n_iter=5, acq="poi", xi=0.1, **gp_params)

[31mBayesian Optimization[0m
[94m-----------------------------------------[0m
 Step |   Time |      Value |         c | 
   21 | 00m00s |    0.99259 |    1.5474 | 
   22 | 00m00s |    0.99234 |    5.0000 | 
   23 | 00m00s |    0.99259 |    1.0000 | 
   24 | 00m00s |    0.99234 |    4.7755 | 
   25 | 00m00s |    0.99259 |    1.5278 | 


In [None]:
#2、使用EI作为acquisition function是一个在explore和exploit之间平衡的一个不错选择。
#explore时，应该选择那些具有比较大方差的点，而在exploit时，则应该优先考虑均值大的点。
#xi较小时，倾向于exploit，当xi较大时，倾向于explore
#参数xi通常可以固定为0.01
bo.maximize(init_points=2, n_iter=25, acq="ei", xi=1e-4, **gp_params)
bo.maximize(init_points=2, n_iter=25, acq="ei", xi=0.1, **gp_params)

In [43]:
#3、除了EI和POI，有一个更简单的想法，直接比较置信空间中的最大值
#这种做法比较的是置信区间内的最大值，尽管看起来简单，但是实际效果却意外的好
#kappa取较小值时，倾向于exploit，当kappa较大时，倾向于explore
bo.maximize(init_points=2, n_iter=5, acq="ucb", kappa=0.1, **gp_params)

[31mBayesian Optimization[0m
[94m-----------------------------------------[0m
 Step |   Time |      Value |         c | 
   61 | 00m04s |    0.99259 |    1.1247 | 
   62 | 00m05s |    0.99259 |    1.1227 | 
   63 | 00m04s |    0.99259 |    1.1205 | 
   64 | 00m04s |    0.99259 |    1.1192 | 
   65 | 00m04s |    0.99259 |    1.1181 | 


In [42]:
bo.maximize(init_points=2, n_iter=5, acq="ucb", kappa=10, **gp_params)

[31mBayesian Optimization[0m
[94m-----------------------------------------[0m
 Step |   Time |      Value |         c | 
   56 | 00m11s |    0.99234 |    4.4198 | 
   57 | 00m10s |    0.99240 |    2.4651 | 


   60 | 00m10s |    0.99234 |    5.0000 | 


In [None]:
#混合使用几个不同的acquisition函数，（类似boost算法）得到了意外的好的结果。