In [None]:
import sklearn.datasets
from sklearn.datasets import fetch_openml
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from lightgbm import LGBMClassifier
from skopt.optimizer import Optimizer
from skopt.space import Integer,Real
import warnings
from tqdm.notebook import tqdm
warnings.filterwarnings("ignore")

In [None]:
data = fetch_openml(name='credit-g')
cat_cols = ['checking_status','credit_history','purpose','savings_status','savings_status','personal_status','other_parties','employment',
           'property_magnitude','other_payment_plans','housing','existing_credits','job','class']
df = data.frame
for col in cat_cols:
    df[col] = pd.factorize(df[col])[0]
x_cols = [col for col in df.columns if col not in ['class']]
x = df[x_cols]
y = df['class']
cat_cols.remove('class')

In [None]:
def myscore(y_pred,y):
    return roc_auc_score(y,y_pred)

def f(para,seed=0):

        
    leaves,lr,n_estimators,child,subsample,colsample = para[0], 0.1, para[1], para[2], para[3], para[4]
    
    x_train, x_test, y_train,y_test = train_test_split(x, y, test_size=0.3, random_state=seed)
    model =        LGBMClassifier(
                num_leaves=leaves,
                learning_rate=lr,
                n_estimators=n_estimators,
                min_child_samples=child,
                subsample = subsample,
                colsample_bytree = colsample,

                random_state = seed,
                n_jobs = -1
            )
    model.fit(x_train,y_train,
              categorical_feature = cat_cols
             
             )
#     print(model.predict_proba(x_test)[:,1],y_test.values)
    score = myscore(model.predict_proba(x_test)[:,1],y_test.values)
    
    return score

space = [Integer(4,100),Integer(1,100),Integer(1,100),Real(0.1,1),Real(0.1,1)]
def ask(history,base_estimator,acqfunc, n_suggest):
        
    optimizer =  Optimizer( space,base_estimator=base_estimator, n_initial_points=1,initial_point_generator='random',
                acq_optimizer="auto", random_state=0,
                              acq_func=acqfunc
                              )
    for i in range(len(history)):
            config, loss = history[i]
            optimizer.tell(config, -loss,
                          fit=(i == len(history) - 1)
                          )
    
    a = optimizer.ask(n_suggest)
    return a

In [None]:
N_suggest = 3
N_round = 50
init_num = 10
oplist = ['GP','RF','ET','GBRT']
acqfunclist = ['LCB','EI','PI','gp_hedge']
score_dict = {}
for test_bo in  oplist:
    for acf in acqfunclist:
        score_dict[(test_bo,acf)] = []
        for allseed in range(10):
            history = [] 
            
            optimizer =  Optimizer( space,base_estimator='GP', n_initial_points=init_num,initial_point_generator='random',
            acq_optimizer="auto", random_state=allseed,
                          acq_func='EI',
                          )
            for i in range(init_num):
                tmp_ask = optimizer.ask()
                tmp_score = f(tmp_ask,allseed)
                history.append((tmp_ask,tmp_score))
                
            for i in tqdm(range(N_round)):
                tmp_ask_list = ask(history,base_estimator=test_bo,acqfunc = acf, n_suggest = N_suggest)
                tmp_score_list = [f(tmp_ask) for tmp_ask in tmp_ask_list]
                history += [(tmp_ask_list[i],tmp_score_list[i]) for i in range(N_suggest)]
            score_dict[(test_bo,acf)].append(np.max(tmp_score_list))
            print(score_dict)
            

In [None]:
epsilon = 0.05
init_std = 2.0
stable = 0.1
waterline = 1
N_group = 4
def get_bo(mydict,history,n_suggest):
    score = [h[1] for h in history]
    base = np.mean(score)
    base_std = np.std(score)
    prob = []
    bo_acf_list = []
    for idx in mydict:
        if len(mydict[idx]) == 0:
            prob.append(init_std)
        else:
            prob.append(max(epsilon,(np.max(mydict[idx] - base - waterline) / base_std)))
        bo_acf_list.append(idx)
    prob = np.array(prob)
    prob /= float(np.sum(prob))
    prob_cum = np.cumsum(prob)
#     print(prob_cum)
    group = []
    for _ in range(N_group):
        x = np.random.random()
        for i in range(len(prob_cum)):
            if x <= prob_cum[i]:
                group.append(bo_acf_list[i])
          
    res = set()
    for i in range(N_group):
        if np.random.random() < stable:
            res.add(group[i])
        else:
            for j in range(i+1,N_group):
                
                res.add((group[i][0],group[j][1]))
                res.add((group[j][0],group[i][1]))
                
    his_set = set()
    re_suggest = []
    for r in res:
        if r[0] not in his_set:
            his_set.add(r[0])
            re_suggest.append(r)
    if len(re_suggest) >= n_suggest:
        return list(re_suggest)[:n_suggest]
    else:
        re_suggest = list(re_suggest)
        while len(re_suggest) < n_suggest:
            x = np.random.random()
            for i in range(len(prob_cum)):
                if x <= prob_cum[i]:
                    if bo_acf_list[i][0] not in his_set:
                        re_suggest.append(bo_acf_list[i])
                        his_set.add(bo_acf_list[i][0])
        return re_suggest

In [None]:
np.random.seed(0)
myalgo_score = []
oplist = ['GP','RF','ET','GBRT']
acqfunclist = ['LCB','EI','PI','gp_hedge']
for allseed in range(10):
    
    history = [] 
    myalgo_score_dict = {
        (op,ac):[] for op in oplist for ac in acqfunclist
    }

    optimizer =  Optimizer( space,base_estimator='GP', n_initial_points=init_num,initial_point_generator='random',
    acq_optimizer="auto", random_state=allseed,
                  acq_func='EI',
                  )
    for i in range(init_num):
        tmp_ask = optimizer.ask()
        tmp_score = f(tmp_ask,allseed)
        history.append((tmp_ask,tmp_score))

    for i in tqdm(range(N_round)):
        res = get_bo(myalgo_score_dict,history,N_suggest)
        tmp_ask_list = []
        for bo,acf in res:
            tmp_ask = ask(history,base_estimator=bo ,acqfunc = acf, n_suggest = 1)
            tmp_ask_list.append(tmp_ask[0])
        tmp_score_list = [f(tmp_ask) for tmp_ask in tmp_ask_list]
        history += [(tmp_ask_list[i],tmp_score_list[i]) for i in range(N_suggest)]
        
    myalgo_score.append(np.max(tmp_score_list))
    
