In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from xgboost import XGBClassifier

data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

base_model = XGBClassifier(random_state=42, eval_metric='logloss')
base_model.fit(X_train, y_train)
base_pred = base_model.predict(X_test)

print(f"--- üèÅ ÌäúÎãù Ï†Ñ Î≤†Ïù¥Ïä§ÎùºÏù∏ Ï†êÏàò ---")
print(f"Ï†ïÌôïÎèÑ: {accuracy_score(y_test, base_pred):.4f}")

--- üèÅ ÌäúÎãù Ï†Ñ Î≤†Ïù¥Ïä§ÎùºÏù∏ Ï†êÏàò ---
Ï†ïÌôïÎèÑ: 0.9561


In [None]:
from sklearn.ensemble import RandomForestClassifier
model_rf = RandomForestClassifier(random_state=42)
model_rf.fit(X_train, y_train)
pred_rf = model_rf.predict(X_test)
score_rf = accuracy_score(y_test, pred_rf)
print(round(score_rf, 3))

report_rf = classification_report(y_test, pred_rf)
print(report_rf)

0.965
              precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114



In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.3],
    'max_depth': [3, 5, 7]
}

grid_search = GridSearchCV(estimator=XGBClassifier(eval_metric='logloss', random_state=42), param_grid=param_grid, cv=3, scoring='accuracy', n_jobs=-1, verbose=1)

grid_search.fit(X_train, y_train)

print(f"\n--- üèÜ Grid Search Í≤∞Í≥º ---")
print(f"ÏµúÏ†ÅÏùò ÌååÎùºÎØ∏ÌÑ∞: {grid_search.best_params_}")
print(f"ÏµúÍ≥†Ïùò Ï†êÏàò(CV): {grid_search.best_score_:.4f}")

best_model = grid_search.best_estimator_
final_pred = best_model.predict(X_test)
print(f"ÌÖåÏä§Ìä∏ÏÖã ÏµúÏ¢Ö Ï†ïÌôïÎèÑ: {accuracy_score(y_test, final_pred):.4f}")


Fitting 3 folds for each of 27 candidates, totalling 81 fits

--- üèÜ Grid Search Í≤∞Í≥º ---
ÏµúÏ†ÅÏùò ÌååÎùºÎØ∏ÌÑ∞: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100}
ÏµúÍ≥†Ïùò Ï†êÏàò(CV): 0.9670
ÌÖåÏä§Ìä∏ÏÖã ÏµúÏ¢Ö Ï†ïÌôïÎèÑ: 0.9561


In [None]:
# 1. ÎùºÏù¥Î∏åÎü¨Î¶¨ ÏÑ§Ïπò (ÏóÜÏúºÎ©¥ Ïã§Ìñâ)
# !pip install optuna 

import optuna
from sklearn.metrics import accuracy_score

# 2. ÏµúÏ†ÅÌôîÌï† Î™©Ìëú Ìï®Ïàò Ï†ïÏùò (Objective Function)
def objective(trial):
    # ÌäúÎãùÌï† ÌïòÏù¥ÌçºÌååÎùºÎØ∏ÌÑ∞ Î≤îÏúÑ ÏÑ§Ï†ï
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 300),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0), # Îç∞Ïù¥ÌÑ∞ ÏÉòÌîåÎßÅ ÎπÑÏú®
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0) # Ïª¨Îüº ÏÉòÌîåÎßÅ ÎπÑÏú®
    }
    
    # Î™®Îç∏ ÏÉùÏÑ± Î∞è ÌïôÏäµ
    model = XGBClassifier(**params, random_state=42, eval_metric='logloss')
    model.fit(X_train, y_train)
    
    # Í≤ÄÏ¶ù (Ïó¨Í∏∞ÏÑúÎäî Ìé∏ÏùòÏÉÅ TestÏÖã Ï†êÏàòÎ•º Î¶¨ÌÑ¥ÌïòÏßÄÎßå, ÏõêÎûòÎäî ValidationÏÖãÏùÑ Ïç®Ïïº Ìï®)
    pred = model.predict(X_test)
    score = accuracy_score(y_test, pred)
    
    return score

# 3. Ïä§ÌÑ∞Îîî ÏÉùÏÑ± Î∞è ÏµúÏ†ÅÌôî Ïã§Ìñâ
study = optuna.create_study(direction='maximize') # Ï†êÏàòÎ•º 'ÏµúÎåÄÌôî' Ìï¥Îùº
study.optimize(objective, n_trials=50) # 50Î≤à ÏãúÎèÑÌï¥Îùº

print(f"\n--- üß† Optuna Í≤∞Í≥º ---")
print(f"Í∞ÄÏû• Ï¢ãÏùÄ Ï†êÏàò: {study.best_value:.4f}")
print(f"Í∑∏ÎïåÏùò ÌååÎùºÎØ∏ÌÑ∞: {study.best_params}")

  from .autonotebook import tqdm as notebook_tqdm
[I 2025-12-17 17:34:54,867] A new study created in memory with name: no-name-a14eb8fd-e035-43f2-bdee-09679257aa9a
[I 2025-12-17 17:34:55,134] Trial 0 finished with value: 0.9736842105263158 and parameters: {'n_estimators': 256, 'max_depth': 7, 'learning_rate': 0.1668295070186331, 'subsample': 0.6421582021836753, 'colsample_bytree': 0.5643943719201644}. Best is trial 0 with value: 0.9736842105263158.
[I 2025-12-17 17:34:55,425] Trial 1 finished with value: 0.9736842105263158 and parameters: {'n_estimators': 252, 'max_depth': 7, 'learning_rate': 0.16573604264062927, 'subsample': 0.7645633641677422, 'colsample_bytree': 0.8881964935889421}. Best is trial 0 with value: 0.9736842105263158.
[I 2025-12-17 17:34:55,542] Trial 2 finished with value: 0.956140350877193 and parameters: {'n_estimators': 51, 'max_depth': 7, 'learning_rate': 0.15266213488437758, 'subsample': 0.7819774601537954, 'colsample_bytree': 0.5284325753642559}. Best is trial 0 w


--- üß† Optuna Í≤∞Í≥º ---
Í∞ÄÏû• Ï¢ãÏùÄ Ï†êÏàò: 0.9825
Í∑∏ÎïåÏùò ÌååÎùºÎØ∏ÌÑ∞: {'n_estimators': 79, 'max_depth': 5, 'learning_rate': 0.1708439135326076, 'subsample': 0.576511712921509, 'colsample_bytree': 0.5095305664507473}


In [None]:
from optuna.visualization import plot_optimization_history, plot_param_importances

# 1. Ï†êÏàòÍ∞Ä Ïñ¥ÎñªÍ≤å Ïò¨ÎùºÍ∞ÄÎäîÏßÄ Î≥¥Ïó¨Ï§å (Ïö∞ÏÉÅÌñ•ÌïòÎ©¥ ÏÑ±Í≥µ!)
plot_optimization_history(study).show()

# 2. Ïñ¥Îñ§ ÌååÎùºÎØ∏ÌÑ∞Í∞Ä Ï†êÏàòÏóê Í∞ÄÏû• ÌÅ∞ ÏòÅÌñ•ÏùÑ Ï§¨ÎäîÏßÄ Î≥¥Ïó¨Ï§å (Ï§ëÏöîÎèÑ)
plot_param_importances(study).show()

In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor
import optuna

data = fetch_california_housing()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.7, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 1.0),
        'objective': 'reg:squarederror',
        'eval_metric': 'rmse'
    }

    model = XGBRegressor(**params, random_state=42)
    model.fit(X_train, y_train)

    pred = model.predict(X_test)
    rmse = mean_squared_error(y_test, pred, squared=False)

    return rmse

study_reg = optuna.create_study(direction='minimize')
study_reg.optimize(objective, n_trials=30)

print(f"\n--- üè† ÏßëÍ∞í ÏòàÏ∏° ÌäúÎãù Í≤∞Í≥º ---")
print(f"Í∞ÄÏû• ÎÇÆÏùÄ Ïò§Ï∞®(RMSE): {study_reg.best_value:.4f}")
print(f"ÏµúÏ†ÅÏùò ÌååÎùºÎØ∏ÌÑ∞: {study_reg.best_params}")

[I 2025-12-18 16:04:34,098] A new study created in memory with name: no-name-ad067215-9a5b-48a0-a774-3cf393841e3a
[I 2025-12-18 16:04:39,319] Trial 0 finished with value: 0.4557594880897289 and parameters: {'n_estimators': 438, 'max_depth': 10, 'learning_rate': 0.10556759708021082, 'subsample': 0.896402464387477, 'colsample_bytree': 0.7164242695863545}. Best is trial 0 with value: 0.4557594880897289.
[I 2025-12-18 16:04:41,394] Trial 1 finished with value: 0.44314097133418945 and parameters: {'n_estimators': 344, 'max_depth': 8, 'learning_rate': 0.039409647574380154, 'subsample': 0.9694540156488256, 'colsample_bytree': 0.8587122561653693}. Best is trial 1 with value: 0.44314097133418945.
[I 2025-12-18 16:04:42,325] Trial 2 finished with value: 0.4525890581744275 and parameters: {'n_estimators': 386, 'max_depth': 5, 'learning_rate': 0.08429347873229504, 'subsample': 0.8237973727559472, 'colsample_bytree': 0.9174229032677971}. Best is trial 1 with value: 0.44314097133418945.
[I 2025-12-1


--- üè† ÏßëÍ∞í ÏòàÏ∏° ÌäúÎãù Í≤∞Í≥º ---
Í∞ÄÏû• ÎÇÆÏùÄ Ïò§Ï∞®(RMSE): 0.4375
ÏµúÏ†ÅÏùò ÌååÎùºÎØ∏ÌÑ∞: {'n_estimators': 324, 'max_depth': 8, 'learning_rate': 0.07783222604057137, 'subsample': 0.8049759264859953, 'colsample_bytree': 0.7633499328759792}


In [None]:
from optuna.visualization import plot_optimization_history, plot_param_importances

# 1. Ïò§Ï∞®Í∞Ä Ï§ÑÏñ¥ÎìúÎäî Í≥ºÏ†ï (Ïö∞ÌïòÌñ•Ìï¥Ïïº ÏÑ±Í≥µ!)
plot_optimization_history(study_reg).show()

# 2. ÏßëÍ∞í ÎßûÏ∂îÎäî Îç∞ Í∞ÄÏû• Ï§ëÏöîÌïú Î≥ÄÏàòÎäî? (Ï§ëÏöîÎèÑ)
plot_param_importances(study_reg).show()