In [None]:
#################################################
# hyperopt 사용법
#################################################

# 1. search_space
from hyperopt import hp
search_space = {
    'x' : hp.quniform('x',-10,10,1),
    'y' : hp.quniform('y',-15,15,1)
}

# 2. 목적함수 생성
from hyperopt import STATUS_OK

def objective_func(search_space):
    x = search_space['x']
    y = search_space['y']
    retval = x**2 - 20*y
    
    return retval # return {'loss':retval, 'status':STATUS_OK}

# 3. 입력 결괏값을 저장할 Trials 객체 생성
from hyperopt import fmin, tpe, Trials

trial_val = Trials()

# 4. 목적 함수의 최솟값을 반환하는 최적 입력 변수값을 20번의 입력값 시도(max_evals=20)로 찾아냄
best_01 = fmin(
    fn=objective_func,
    space=search_space,
    algo=tpe.suggest, # 사실상 고정
    max_evals=20,
    trials=trial_val,
    rstate=np.random.default_rng(seed=0) # 안 고정시키는 게 좀 더 나은 성능이 나옴. 
)
print('best: ', best_01) # 이때 최소가 되는 값을 반환

# 4. 시도 결과값을 데이터 프레임으로 만들어주기
print(trial_val.results)
print(trial_val.vals)

import pandas as pd

losses = [loss_dict['loss'] for loss_dict in trial_val.results]
result_df = pd.DataFrame({
    'x' : trial_val.vals['x'],
    'y' : trial_val.vals['y'],
    'losses' : losses
})

result_df

In [None]:
#################################################
# hyperopt를 사용한 XGBOOST 하이퍼 파라미터 튜닝
#################################################

import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

dataset = load_breast_cancer()

cancer_df = pd.DataFrame(data=dataset.data, columns=dataset.feature_names)
cancer_df['target']= dataset.target
X_features = cancer_df.iloc[:, :-1]
y_label = cancer_df.iloc[:, -1]

# 전체 데이터 중 80%는 학습용 데이터, 20%는 테스트용 데이터 추출
X_train, X_test, y_train, y_test=train_test_split(X_features, y_label,
                                         test_size=0.2, random_state=156 )
# 학습 데이터를 다시 학습과 검증 데이터로 분리 
X_tr, X_val, y_tr, y_val= train_test_split(X_train, y_train,
                                         test_size=0.1, random_state=156 )

In [None]:
# 1. search_space
xgb_search_space = {
    'max_depth' : hp.quniform('max_depth', 5, 20, 1),
    'min_child_weight' : hp.quniform('min_child_weight', 1, 2, 1),
    'learning_rate' : hp.uniform('learning_rate', 0.01, 0.2),
    'colsample_bytree' : hp.uniform('colsample_bytree', 0.5, 1)
}

# 2. 목적함수
from sklearn.model_selection import cross_val_score
from xgboost import XGBClassifier
from hyperopt import STATUS_OK

def objective_func(search_space):
    xgb_clf = XGBClassifier(n_estimators = 100,
                            max_depth = int(search_space['max_depth']),
                            min_child_weight = int(search_space['min_child_weight']),
                            learning_rate = search_space['learning_rate'],
                            colsample_bytree = search_space['colsample_bytree'],
                            eval_metric = 'logloss')
    # 교차검증을 통해 좀더 정확하게 하이퍼 파라미터 튜닝을 해줌.
    # 여기서는 데이터 셋이 작아서 교차 검증 해줌
    accuracy = cross_val_score(xgb_clf, X_train, y_train, scoring='accuracy', cv=3)
    # accuracy는 cv=3 개수 만큼의 정확도를 가지므로 이를 평균해서 -1을 곱해줌
    return {'loss':-1.np.mean(accuracy), 'status':STATUS_OK}

# 3. fmin()
from hyperopt import fmin, tpe, Trials
trial_val = Trials()
best = fmin(
    fn=objective_func,
    space=xgb_search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=trial_val,
    rstate=np.random.default_rng(seed=9)
)
print('best: ', best)

# 4. 해당 파라미터들로 모델링
xgb_wrapper = XGBClassifier(n_estimators=400,
                            learning_rate = round(best['learning_rate'], 5),
                            max_depth = int(best['max_depth']),
                            min_child_weight = int(best['min_child_weight']),
                            colsample_bytree = round(best['colsample_bytree'], 5))

evals = [(X_tr, y_tr), (X_val, y_val)]
xgb_wrapper.fit(X_tr, y_tr, early_stopping_rounds=50,
                eval_metric='logloss', eval_set=evals, verbose=True)

preds = xgb_wrapper.predict(X_test)
pred_proba = xgb_wrapper.predict_proba(X_test)[:,1]

get_clf_eval(y_test, preds, pred_proba)

# 5. 시도 결괏값 데이터 프레임으로 만들어 주기
losses = [loss_dict['loss'] for loss_dict in trial_val.results]
result_df = pd.DataFrame({
    'max_depth' : trial_val.vals['max_depth'],
    'min_child_weight' : trial_val.vals['min_child_weight'],
    'learing_rate' : trial_val.vals['learing_rate'],
    'colsample_bytree' : trial_val.vals['colsample_bytree'],
    'losses' : losses
})

result_df