In [1]:
# !pip install hyperopt

In [2]:
from hyperopt import hp

search_space = {
    'x': hp.quniform('x', -10, 10, 1),
    'y': hp.quniform('y', -15, 15, 1)
}

In [3]:
import hyperopt

# 목적 함수
def objective(search_space):
    x = search_space['x']
    y = search_space['y']
    return {
        'loss': x**2 + 20 * y,
        'status': hyperopt.STATUS_OK
    }

In [4]:
from hyperopt import fmin, tpe, Trials

# 탐색 과정을 저장하는 객체
trials = Trials()

# fmin() : 목적 함수의 최소값을 찾는 함수
best_val = fmin(
    fn=objective,       # 목적 함수
    space=search_space,  # 검색 공간
    algo=tpe.suggest,   # 베이지안 최적화 적용
    max_evals=500,      # 반복 횟수
    trials=trials       # 탐색과정 저장
)
best_val


100%|██████████| 500/500 [00:05<00:00, 85.17trial/s, best loss: -300.0] 


{'x': -0.0, 'y': -15.0}

In [5]:
# 탐색과정 -> 목적함수 반환값 (loss와 실행 상태) 저장
trials.results

# 탐색과정 -> 하이퍼 파라미터값을 딕셔너리(리스트) 형태로 저장
trials.vals

{'x': [3.0,
  -2.0,
  8.0,
  9.0,
  5.0,
  6.0,
  -3.0,
  6.0,
  -3.0,
  -2.0,
  4.0,
  -10.0,
  -7.0,
  1.0,
  -10.0,
  -2.0,
  -5.0,
  -1.0,
  -10.0,
  -7.0,
  9.0,
  1.0,
  1.0,
  2.0,
  2.0,
  -0.0,
  3.0,
  3.0,
  3.0,
  7.0,
  4.0,
  -5.0,
  7.0,
  5.0,
  10.0,
  0.0,
  2.0,
  8.0,
  5.0,
  -1.0,
  -4.0,
  -1.0,
  -8.0,
  -3.0,
  -1.0,
  -5.0,
  -4.0,
  -2.0,
  2.0,
  -8.0,
  -6.0,
  -1.0,
  6.0,
  4.0,
  1.0,
  -3.0,
  -2.0,
  -9.0,
  -4.0,
  0.0,
  2.0,
  -6.0,
  1.0,
  4.0,
  9.0,
  2.0,
  6.0,
  5.0,
  2.0,
  3.0,
  -0.0,
  7.0,
  3.0,
  4.0,
  -1.0,
  1.0,
  -0.0,
  5.0,
  2.0,
  -2.0,
  -3.0,
  3.0,
  8.0,
  1.0,
  -1.0,
  10.0,
  -3.0,
  -4.0,
  6.0,
  2.0,
  -6.0,
  4.0,
  -2.0,
  -0.0,
  1.0,
  7.0,
  -1.0,
  5.0,
  3.0,
  4.0,
  -5.0,
  -2.0,
  8.0,
  6.0,
  2.0,
  1.0,
  -3.0,
  -7.0,
  -4.0,
  -1.0,
  0.0,
  1.0,
  -5.0,
  0.0,
  7.0,
  3.0,
  1.0,
  -8.0,
  -1.0,
  -2.0,
  -3.0,
  5.0,
  -5.0,
  2.0,
  3.0,
  -4.0,
  -9.0,
  -6.0,
  9.0,
  4.0,
  -2.0,
  0.0,
  1.0,


- hyperopt를 활용한 XGBoost 하이퍼 파라미터 튜닝

In [6]:
from xgboost import XGBClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, cross_val_score

data = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, random_state=42)

# 1. 검색 공간
search_space = {
    'n_estimators': hp.quniform('n_estimators', 100, 500, 100),
    'max_depth': hp.quniform('max_depth', 3, 10, 1),
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.2),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
}

# 2. 목적 함수
def xgb_objective(ss):
    xgb_clf = XGBClassifier(
        n_estimators=int(ss['n_estimators']),
        max_depth=int(ss['max_depth']),
        learning_rate=ss['learning_rate'], 
        colsample_bytree=ss['colsample_bytree']
    )
    mean_acc = cross_val_score(xgb_clf, X_train, y_train, scoring='accuracy', cv=3).mean()
    return {
        'loss': -1 * mean_acc,
        'status': hyperopt.STATUS_OK
    }

# 3. Trials() + fmin()
trials = Trials()
best = fmin(
    fn=xgb_objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=trials
)

best

100%|██████████| 50/50 [00:12<00:00,  3.95trial/s, best loss: -0.9741784037558686]


{'colsample_bytree': 0.5227845834330785,
 'learning_rate': 0.19706220276856828,
 'max_depth': 4.0,
 'n_estimators': 500.0}

In [7]:
import xgboost
import sklearn
print(xgboost.__version__)
print(sklearn.__version__)

2.1.4
1.6.1


In [8]:
!pip install --upgrade xgboost



In [11]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.2.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.1-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting sqlalchemy>=1.4.2 (from optuna)
  Downloading SQLAlchemy-2.0.38-cp312-cp312-win_amd64.whl.metadata (9.9 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.9-py3-none-any.whl.metadata (2.9 kB)
Collecting greenlet!=0.4.17 (from sqlalchemy>=1.4.2->optuna)
  Downloading greenlet-3.1.1-cp312-cp312-win_amd64.whl.metadata (3.9 kB)
Downloading optuna-4.2.0-py3-none-any.whl (383 kB)
Downloading alembic-1.14.1-py3-none-any.whl (233 kB)
Downloading SQLAlchemy-2.0.38-cp312-cp312-win_amd64.whl (2.1 MB)
   ---------------------------------------- 0.0/2.1 MB ? eta -:--:--
   ---- ----------------------------------- 0.3/2.1 MB ? eta -:--:--
   --------- ------------------------------ 0.5/2.1 MB

In [14]:
import optuna

# 목적 함수
def objective(trial):
    x = trial.suggest_uniform('x', -10, 10)
    y = trial.suggest_uniform('y', -15, 15)

    return (x - 3) ** 2 + (y + 5) ** 2

# 스터디 생성
study = optuna.create_study(direction='minimize')

# 최적화 실행
study.optimize(objective, n_trials=500)

# 결과 확인
print(study.best_value)
print(study.best_params)

[I 2025-02-11 16:16:11,635] A new study created in memory with name: no-name-fabcf07d-5c2d-46ac-9d55-7d814b689a4c
  x = trial.suggest_uniform('x', -10, 10)
  y = trial.suggest_uniform('y', -15, 15)
[I 2025-02-11 16:16:11,638] Trial 0 finished with value: 84.40781447041014 and parameters: {'x': 5.888936389936877, 'y': -13.721345137380393}. Best is trial 0 with value: 84.40781447041014.
[I 2025-02-11 16:16:11,640] Trial 1 finished with value: 82.33142414653062 and parameters: {'x': -1.7658285105856368, 'y': -12.721288930885809}. Best is trial 1 with value: 82.33142414653062.
[I 2025-02-11 16:16:11,642] Trial 2 finished with value: 365.40878794624837 and parameters: {'x': -9.727282119090164, 'y': 9.262716354444414}. Best is trial 1 with value: 82.33142414653062.
[I 2025-02-11 16:16:11,643] Trial 3 finished with value: 74.73135138477046 and parameters: {'x': -2.2544594597202128, 'y': 1.8645471205992337}. Best is trial 3 with value: 74.73135138477046.
[I 2025-02-11 16:16:11,644] Trial 4 fin

0.0020041842035673136
{'x': 3.026047451778593, 'y': -5.036410361978552}


In [15]:
import optuna.visualization as vis

vis.plot_param_importances(study).show()

In [16]:
vis.plot_optimization_history(study).show()

- optuna를 활용한 XGBoost 하이퍼 파라미터 튜닝

In [18]:
# 1. 목적 함수
def xgb_optuna_objective(trial):
    params = {
        'n_estimators' : trial.suggest_int('n_estimators', 100, 500, 100),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.2),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0)
    }

    xgb_clf = XGBClassifier(**params)
    return cross_val_score(xgb_clf, X_train, y_train, scoring='accuracy', cv=3).mean()

# 2. study 객체 > 최적화
study = optuna.create_study(direction='maximize')
study.optimize(xgb_optuna_objective, n_trials=50)

# 3. 결과 출력
print(study.best_params)
print(study.best_value)

[I 2025-02-11 17:12:54,173] A new study created in memory with name: no-name-a102e629-c905-42a6-b832-06d2a9158c4f

suggest_int() got {'step'} as positional arguments but they were expected to be given as keyword arguments.

[I 2025-02-11 17:12:54,637] Trial 0 finished with value: 0.9647887323943661 and parameters: {'n_estimators': 500, 'max_depth': 9, 'learning_rate': 0.19118751047074353, 'colsample_bytree': 0.6176643520052345}. Best is trial 0 with value: 0.9647887323943661.

suggest_int() got {'step'} as positional arguments but they were expected to be given as keyword arguments.

[I 2025-02-11 17:12:55,060] Trial 1 finished with value: 0.9624413145539906 and parameters: {'n_estimators': 500, 'max_depth': 8, 'learning_rate': 0.14471757082742995, 'colsample_bytree': 0.8753883989738347}. Best is trial 0 with value: 0.9647887323943661.

suggest_int() got {'step'} as positional arguments but they were expected to be given as keyword arguments.

[I 2025-02-11 17:12:55,505] Trial 2 finish

{'n_estimators': 300, 'max_depth': 6, 'learning_rate': 0.14298643813537268, 'colsample_bytree': 0.5370066590293221}
0.971830985915493


##### HyperOpt vs Optuna

In [19]:
from sklearn.metrics import accuracy_score

xgb_hpopt = XGBClassifier(
    n_estimators = 400,
    max_depth = 10,
    learning_rate = 0.11,
    colsample_bytree = 0.6
)

xgb_optuna = XGBClassifier(
    n_estimators = 500,
    max_depth = 7,
    learning_rate = 0.2,
    colsample_bytree = 0.55
)

xgb_hpopt.fit(X_train, y_train)
xgb_optuna.fit(X_train, y_train)

hpopt_pred = xgb_hpopt.predict(X_test)
optuna_pred = xgb_optuna.predict(X_test)

print(f'HyperOpt 최적 파라미터 적용: {accuracy_score(y_test, hpopt_pred)}')
print(f'Optuna 최적 파라미터 적용: {accuracy_score(y_test, hpopt_pred)}')

HyperOpt 최적 파라미터 적용: 0.958041958041958
Optuna 최적 파라미터 적용: 0.958041958041958
