# Hyper Parameter Tuning
- hyper parameter: 모델 설정과 관련해 직접 지정할 수 있는 매개변수
- model parameter: 회귀계수(가중치), 절편 등 모델의 학습 대상이 되는 변수

### GridSearchCV


In [6]:
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

# 데이터 로드
iris_input, iris_target = load_iris(return_X_y=True)

# 모델 생성
knn = KNeighborsClassifier()

# 테스트할 파라미터 값
params = {
    'n_neighbors': range(1, 13, 2)
}

# 첫 번째 인자: 모델
# 두 번째 인자: 테스트 할 파라미터 (딕셔너리)
# scoring: 평가 지표 (accuracy, precision, recall, f1)
# cv: 반복 횟수 (KFold 사용, StratifiedKFold 객체 전달 가능)
grid = GridSearchCV(knn, params, scoring='accuracy', cv=5)
grid.fit(iris_input, iris_target)

print('최적의 파라미터:', grid.best_params_)
print('최적화된 모델 객체:', grid.best_estimator_)
print('최적화된 점수:', grid.best_score_)

최적의 파라미터: {'n_neighbors': 7}
최적화된 모델 객체: KNeighborsClassifier(n_neighbors=7)
최적화된 점수: 0.9800000000000001


In [7]:
best_knn = grid.best_estimator_
best_knn.fit(iris_input, iris_target)
best_knn.score(iris_input, iris_target)

0.9733333333333334

### RandomSearchCV
- 하이퍼 파라미터의 값 목록이나 값의 범위를 제공하는데, 이 범위 중에 랜덤하게 값을 뽑아내 최적의 하이퍼 파라미터 조합을 찾는다.
    - 탐색범위가 넓을 때 짧은 시간 내에 좋은 결과를 얻을 수 있다.
    - 랜덤하게 값을 추출해 계산하므로, 전역 최적값을 놓칠 수 있다.

In [8]:
from sklearn.model_selection import RandomizedSearchCV

# 모델 생성
knn = KNeighborsClassifier()

# 테스트할 파라미터 생성
params = {
    'n_neighbors': range(1, 100, 2)  
}

# n_iter: 탐색할 최적의 하이퍼 파라미터 조합 수 (기본값: 10)
#         값이 크면 시간이 오래 걸림 / 값이 작으면 좋은 조합을 찾을 가능성 저하
rd_search = RandomizedSearchCV(knn, params, cv=5, n_iter=10, random_state=0)
rd_search.fit(iris_input, iris_target)

print('최적의 파라미터:', rd_search.best_params_)
print('최적화된 모델 객체:', rd_search.best_estimator_)
print('최적화된 파라미터:', rd_search.best_score_)
rd_search.cv_results_

최적의 파라미터: {'n_neighbors': 5}
최적화된 모델 객체: KNeighborsClassifier()
최적화된 파라미터: 0.9733333333333334


{'mean_fit_time': array([0.00096965, 0.00093102, 0.00077868, 0.00074735, 0.00074139,
        0.0008224 , 0.00072551, 0.00070615, 0.00086832, 0.00070643]),
 'std_fit_time': array([2.34508543e-04, 3.19760577e-04, 1.25891060e-04, 1.29107885e-04,
        6.28699622e-05, 1.15810641e-04, 1.22662686e-04, 6.65172283e-05,
        8.01972909e-05, 7.20133047e-05]),
 'mean_score_time': array([0.00453033, 0.00320964, 0.00308061, 0.01420403, 0.00333223,
        0.00353422, 0.00213704, 0.00199928, 0.00366817, 0.00341754]),
 'std_score_time': array([0.00174211, 0.00041112, 0.00029284, 0.02455367, 0.0002837 ,
        0.00029125, 0.0004465 , 0.00035921, 0.00034197, 0.0003337 ]),
 'param_n_neighbors': masked_array(data=[57, 23, 21, 83, 5, 55, 77, 63, 45, 9],
              mask=[False, False, False, False, False, False, False, False,
                    False, False],
        fill_value=999999),
 'params': [{'n_neighbors': 57},
  {'n_neighbors': 23},
  {'n_neighbors': 21},
  {'n_neighbors': 83},
  {'n_nei

---

### HyperOpt

In [9]:
!pip3 install Hp


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.10 -m pip install --upgrade pip[0m


In [10]:
import hyperopt
from hyperopt import hp
# 검색 공간
search_space = {
    'x': hp.quniform('x', -10, 10, 1),
    'y': hp.quniform('y', -15, 15, 1)
}

In [11]:
# 목적 함수
def objective(search_space):
    x = search_space['x']
    y = search_space['y']
    return {
        'loss': x**2 + 20 * y,
        'status': hyperopt.STATUS_OK}

In [12]:
from hyperopt import fmin, tpe, Trials

# 탐색 과정을 저장하는 객체
trials = Trials()

# fmin() : 목적 함수의 최소값을 찾는 함수
fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=500,
    trials=trials
)

100%|██████████| 500/500 [00:08<00:00, 56.23trial/s, best loss: -300.0] 


{'x': np.float64(-0.0), 'y': np.float64(-15.0)}

In [13]:
#!pip3 install --upgrade xgboost

- hyperopt를 활용한 XGBoost 하이퍼 파라미터 튜닝

In [14]:
from xgboost import XGBClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, cross_val_score

data = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, random_state=42)

# 1. 검색 공간
search_space = {
    'n_estimators': hp.quniform('n_estimators', 100, 500, 100),
    'max_depth': hp.quniform('max_depth', 3, 10, 1),
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.2),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1)
}

# 2. 목적 함수
def xgb_objective(ss):
    xgb_clf = XGBClassifier(
        n_estimators=int(ss['n_estimators']),
        max_depth=int(ss['max_depth']),
        learning_rate=ss['learning_rate'],
        colsample_bytree=ss['colsample_bytree']    
    )
    mean_acc = cross_val_score(xgb_clf, X_train, y_train, scoring='accuracy', cv=3).mean()
    return {
        'loss': -1 * mean_acc,
        'status': hyperopt.STATUS_OK
    }

# 3. Trials() + fmin()
trials = Trials()
best = fmin(
    fn=xgb_objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=trials
)

best

100%|██████████| 50/50 [00:29<00:00,  1.67trial/s, best loss: -0.9671361502347419]


{'colsample_bytree': np.float64(0.5057484080470731),
 'learning_rate': np.float64(0.03042195517923342),
 'max_depth': np.float64(3.0),
 'n_estimators': np.float64(300.0)}

---


In [15]:
# 탐색과정 -> 목적함수 반환값 (loss의 실행 상태 저장)
trials.results

# 탐색과정 -> 하이퍼 파라미터값을 딕셔너리 형태로 저장
trials.vals

{'colsample_bytree': [np.float64(0.8277375223786566),
  np.float64(0.8817568773496226),
  np.float64(0.5687404394272759),
  np.float64(0.7798737435229448),
  np.float64(0.7791438032256472),
  np.float64(0.7900189490001761),
  np.float64(0.5057484080470731),
  np.float64(0.6364175808400087),
  np.float64(0.8680915323231867),
  np.float64(0.9501029761538042),
  np.float64(0.9228091349460845),
  np.float64(0.9997223091629328),
  np.float64(0.8665202116474617),
  np.float64(0.5967686111552134),
  np.float64(0.7431658692595542),
  np.float64(0.9351466393343004),
  np.float64(0.8509254304504665),
  np.float64(0.8502372939540088),
  np.float64(0.5596252063032483),
  np.float64(0.6176276009664499),
  np.float64(0.6692292059993865),
  np.float64(0.5201195653575879),
  np.float64(0.9819883495175155),
  np.float64(0.7108975703695168),
  np.float64(0.5124160641997955),
  np.float64(0.7059280029131089),
  np.float64(0.9607334037528671),
  np.float64(0.6553733847595331),
  np.float64(0.8141785804111

### Optuna

In [16]:
# !pip3 install Optuna

In [17]:
import optuna

# 목적 함수
def objective(trial):
    x = trial.suggest_uniform('x', -10, 10)
    y = trial.suggest_uniform('y', -15, 15)
    return (x - 3) ** 2 + (y + 5) ** 2

# 스터디 생성
study = optuna.create_study(direction='minimize')

# 최적화 실행
study.optimize(objective, n_trials=500)

# 결과 확인
print(study.best_value)
print(study.best_params)

[I 2025-02-11 17:16:58,077] A new study created in memory with name: no-name-6e5e689c-6a36-428b-a485-606c4a18e200
  x = trial.suggest_uniform('x', -10, 10)
  y = trial.suggest_uniform('y', -15, 15)
[I 2025-02-11 17:16:58,080] Trial 0 finished with value: 114.85903067557416 and parameters: {'x': -0.041814917616873615, 'y': 5.276497101762722}. Best is trial 0 with value: 114.85903067557416.
[I 2025-02-11 17:16:58,082] Trial 1 finished with value: 49.6189973747138 and parameters: {'x': 9.294370563378433, 'y': -1.8377386911279974}. Best is trial 1 with value: 49.6189973747138.
[I 2025-02-11 17:16:58,089] Trial 2 finished with value: 276.1871031239545 and parameters: {'x': 4.988811911626186, 'y': 11.49944636356409}. Best is trial 1 with value: 49.6189973747138.
[I 2025-02-11 17:16:58,090] Trial 3 finished with value: 90.81127158157648 and parameters: {'x': -6.529414959424955, 'y': -5.0390142623198795}. Best is trial 1 with value: 49.6189973747138.
[I 2025-02-11 17:16:58,094] Trial 4 finishe

0.0001587544180381312
{'x': 2.993325202268755, 'y': -5.010686509873906}


In [18]:
import optuna.visualization as vis
vis.plot_param_importances(study).show()

In [19]:

vis.plot_optimization_history(study).show()

- Optuna를 활용한 XGBoost 하이퍼 파라미터 튜닝

In [26]:
# 1. 목적 함수
def xgb_optuna_objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 500, 100),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.2),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0)    
    }

    xgb_clf = XGBClassifier(**params)
    return cross_val_score(xgb_clf, X_train, y_train, scoring='accuracy', cv=3).mean

# 2. study 객체 -> 최적화
study = optuna.create_study(direction='maximize')
study.optimize(xgb_optuna_objective, n_trials=50)


# 3. 결과 출력
print(study.best_params)
print(study.best_value)


[I 2025-02-11 17:22:41,188] A new study created in memory with name: no-name-b15952d0-7d06-409f-9190-8ae8719b0115



suggest_int() got {'step'} as positional arguments but they were expected to be given as keyword arguments.

[W 2025-02-11 17:22:41,493] Trial 0 failed with parameters: {'n_estimators': 100, 'max_depth': 5, 'learning_rate': 0.07681268322739324, 'colsample_bytree': 0.7136522256956832} because of the following error: The value <built-in method mean of numpy.ndarray object at 0x121fec7b0> could not be cast to float.
[W 2025-02-11 17:22:41,494] Trial 0 failed with value <built-in method mean of numpy.ndarray object at 0x121fec7b0>.

suggest_int() got {'step'} as positional arguments but they were expected to be given as keyword arguments.

[W 2025-02-11 17:22:41,821] Trial 1 failed with parameters: {'n_estimators': 200, 'max_depth': 6, 'learning_rate': 0.08387103221864273, 'colsample_bytree': 0.9856329213938652} because of the following error: The value <built-in method mean of numpy.ndarray object at 0x121fecff0> could not be cast to float.
[W 2025-02-11 17:22:41,822] Trial 1 failed with

ValueError: No trials are completed yet.

##### HyperOpt vs Optuna

In [25]:
from sklearn.metrics import accuracy_score

xgb_hpopt = XGBClassifier(
    n_estimators=400,
    max_depth=10,
    learning_rate=0.11,
    colsample_bytree=0.6
)

xgb_optuna = XGBClassifier(
    n_estimators=500,
    max_depth=7,
    learning_rate=0.2,
    colsample_bytree=0.55    
)

xgb_hpopt.fit(X_train, y_train)
xgb_optuna.fit(X_train, y_train)

hpopt_pred = xgb_hpopt.predict(X_test)
optuna_pred = xgb_optuna.predict(X_test)

print(f'HyperOpt 최적 파라미터 적용: {accuracy_score(y_test, hpopt_pred)}')
print(f'Optuna 최적 파라미터 적용: {accuracy_score(y_test, optuna_pred)}')

HyperOpt 최적 파라미터 적용: 0.958041958041958
Optuna 최적 파라미터 적용: 0.972027972027972
