### Import

In [3]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-2.1.4-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-2.1.4-py3-none-win_amd64.whl (124.9 MB)
   ---------------------------------------- 0.0/124.9 MB ? eta -:--:--
   ----- ---------------------------------- 16.8/124.9 MB 86.2 MB/s eta 0:00:02
   ------------- ------------------------- 41.9/124.9 MB 102.8 MB/s eta 0:00:01
   --------------------- ----------------- 67.4/124.9 MB 108.1 MB/s eta 0:00:01
   ---------------------------- ---------- 92.3/124.9 MB 110.4 MB/s eta 0:00:01
   ----------------------------------- -- 117.2/124.9 MB 111.6 MB/s eta 0:00:01
   -------------------------------------- 124.9/124.9 MB 101.7 MB/s eta 0:00:00
Installing collected packages: xgboost
Successfully installed xgboost-2.1.4


In [30]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import OrdinalEncoder

import optuna
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

### Data Load

In [31]:
train = pd.read_csv('./Data/train.csv').drop(columns=['ID'])
test = pd.read_csv('./Data/test.csv').drop(columns=['ID'])

In [32]:
X = train.drop('임신 성공 여부', axis=1)
y = train['임신 성공 여부']

### Data Pre-processing

In [33]:
categorical_columns = [
    "시술 시기 코드",
    "시술 당시 나이",
    "시술 유형",
    "특정 시술 유형",
    "배란 자극 여부",
    "배란 유도 유형",
    "단일 배아 이식 여부",
    "착상 전 유전 검사 사용 여부",
    "착상 전 유전 진단 사용 여부",
    "남성 주 불임 원인",
    "남성 부 불임 원인",
    "여성 주 불임 원인",
    "여성 부 불임 원인",
    "부부 주 불임 원인",
    "부부 부 불임 원인",
    "불명확 불임 원인",
    "불임 원인 - 난관 질환",
    "불임 원인 - 남성 요인",
    "불임 원인 - 배란 장애",
    "불임 원인 - 여성 요인",
    "불임 원인 - 자궁경부 문제",
    "불임 원인 - 자궁내막증",
    "불임 원인 - 정자 농도",
    "불임 원인 - 정자 면역학적 요인",
    "불임 원인 - 정자 운동성",
    "불임 원인 - 정자 형태",
    "배아 생성 주요 이유",
    "총 시술 횟수",
    "클리닉 내 총 시술 횟수",
    "IVF 시술 횟수",
    "DI 시술 횟수",
    "총 임신 횟수",
    "IVF 임신 횟수",
    "DI 임신 횟수",
    "총 출산 횟수",
    "IVF 출산 횟수",
    "DI 출산 횟수",
    "난자 출처",
    "정자 출처",
    "난자 기증자 나이",
    "정자 기증자 나이",
    "동결 배아 사용 여부",
    "신선 배아 사용 여부",
    "기증 배아 사용 여부",
    "대리모 여부",
    "PGD 시술 여부",
    "PGS 시술 여부"
]

In [34]:
# 카테고리형 컬럼들을 문자열로 변환
for col in categorical_columns:
    X[col] = X[col].astype(str)
    test[col] = test[col].astype(str)

In [35]:
ordinal_encoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)

X_train_encoded = X.copy()
X_train_encoded[categorical_columns] = ordinal_encoder.fit_transform(X[categorical_columns])

X_test_encoded = test.copy()
X_test_encoded[categorical_columns] = ordinal_encoder.transform(test[categorical_columns])

In [9]:
numeric_columns = [
    "임신 시도 또는 마지막 임신 경과 연수",
    "총 생성 배아 수",
    "미세주입된 난자 수",
    "미세주입에서 생성된 배아 수",
    "이식된 배아 수",
    "미세주입 배아 이식 수",
    "저장된 배아 수",
    "미세주입 후 저장된 배아 수",
    "해동된 배아 수",
    "해동 난자 수",
    "수집된 신선 난자 수",
    "저장된 신선 난자 수",
    "혼합된 난자 수",
    "파트너 정자와 혼합된 난자 수",
    "기증자 정자와 혼합된 난자 수",
    "난자 채취 경과일",
    "난자 해동 경과일",
    "난자 혼합 경과일",
    "배아 이식 경과일",
    "배아 해동 경과일"
]

In [36]:
numeric_columns = [col for col in numeric_columns if col in X.columns]

X_train_encoded[numeric_columns] = X_train_encoded[numeric_columns].fillna(0)
X_test_encoded[numeric_columns] = X_test_encoded[numeric_columns].fillna(0)

In [64]:
from sklearn.feature_selection import SelectKBest, f_classif
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import VarianceThreshold, SelectKBest, f_classif

# 1️⃣ 상수 Feature 제거
var_thresh = VarianceThreshold(threshold=0.0)
X_train_var_filtered = var_thresh.fit_transform(X_train_encoded)

# 2️⃣ Feature Selection 적용
selector = SelectKBest(f_classif, k=20)
X_train_selected = selector.fit_transform(X_train_var_filtered, y)

# 2️⃣ SMOTE 적용 (Feature Selection 후)
smote = SMOTE(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_selected, y)

In [65]:
print("X_train_encoded:", X_train_encoded.shape)
print("y:", y.shape)
print("X_train_selected:", X_train_selected.shape)
print("X_train_resampled:", X_train_resampled.shape)
print("y_resampled:", y_resampled.shape)

X_train_encoded: (256351, 67)
y: (256351,)
X_train_selected: (256351, 20)
X_train_resampled: (380246, 67)
y_resampled: (380246,)


In [66]:
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled
)

### Train

XGBoost

In [67]:
def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 1000, step=50),
        "max_depth": trial.suggest_int("max_depth", 3, 15),
        "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        "gamma": trial.suggest_float("gamma", 0, 5),
        "lambda": trial.suggest_float("lambda", 1, 10),
        "alpha": trial.suggest_float("alpha", 0, 10),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 10),
    }

    model = xgb.XGBClassifier(
        **params,
        objective="binary:logistic",
        eval_metric="auc",
        use_label_encoder=False,
        verbosity=0,  # 경고 메시지 숨김
        random_state=42,
        n_jobs=-1,
        scale_pos_weight=len(y_train[y_train == 0]) / len(y_train[y_train == 1])
    )

    model.fit(X_train, y_train, eval_set=[(X_valid, y_valid)], verbose=False)
    y_pred = model.predict_proba(X_valid)[:, 1]
    auc = roc_auc_score(y_valid, y_pred)

    return auc


In [None]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="xgboost")

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=150, show_progress_bar=True)

[I 2025-02-16 01:27:29,706] A new study created in memory with name: no-name-c1c343b6-ea03-4f00-abd1-0f0252ff3e71
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),
  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:27:44,349] Trial 0 finished with value: 0.9063829345118375 and parameters: {'n_estimators': 500, 'max_depth': 3, 'learning_rate': 0.1678756889016412, 'subsample': 0.8071020557907851, 'colsample_bytree': 0.7332391139768633, 'gamma': 1.9926006552330549, 'lambda': 6.761133455136932, 'alpha': 0.23872499618251442, 'min_child_weight': 10}. Best is trial 0 with value: 0.9063829345118375.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:27:52,681] Trial 1 finished with value: 0.906917425601085 and parameters: {'n_estimators': 200, 'max_depth': 14, 'learning_rate': 0.06234382367643746, 'subsample': 0.8500205740199213, 'colsample_bytree': 0.682456051313469, 'gamma': 2.109104712366281, 'lambda': 5.363802198821464, 'alpha': 4.936502259684005, 'min_child_weight': 3}. Best is trial 1 with value: 0.906917425601085.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:28:18,498] Trial 2 finished with value: 0.9060362377946962 and parameters: {'n_estimators': 700, 'max_depth': 12, 'learning_rate': 0.01561872024441029, 'subsample': 0.9089534862786809, 'colsample_bytree': 0.844162764560535, 'gamma': 3.5003956832747463, 'lambda': 9.294543561916308, 'alpha': 4.553704074287143, 'min_child_weight': 2}. Best is trial 1 with value: 0.906917425601085.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:28:37,705] Trial 3 finished with value: 0.9073744933196914 and parameters: {'n_estimators': 600, 'max_depth': 15, 'learning_rate': 0.05950806279671268, 'subsample': 0.6278610544641121, 'colsample_bytree': 0.9327506654683642, 'gamma': 2.8188022365780907, 'lambda': 6.416034884566681, 'alpha': 0.6064001650747364, 'min_child_weight': 2}. Best is trial 3 with value: 0.9073744933196914.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:28:51,388] Trial 4 finished with value: 0.9051130975892621 and parameters: {'n_estimators': 500, 'max_depth': 3, 'learning_rate': 0.08606667607912, 'subsample': 0.6919049046993743, 'colsample_bytree': 0.5049238302736159, 'gamma': 3.5247492351101117, 'lambda': 6.748936336641793, 'alpha': 0.35512368601196553, 'min_child_weight': 8}. Best is trial 3 with value: 0.9073744933196914.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:29:10,716] Trial 5 finished with value: 0.9059214266540621 and parameters: {'n_estimators': 600, 'max_depth': 8, 'learning_rate': 0.02719916439194592, 'subsample': 0.6833300495736243, 'colsample_bytree': 0.8224330455136026, 'gamma': 4.009549829892841, 'lambda': 9.301001632875387, 'alpha': 3.1453572028808163, 'min_child_weight': 4}. Best is trial 3 with value: 0.9073744933196914.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:29:22,330] Trial 6 finished with value: 0.9058956970158305 and parameters: {'n_estimators': 400, 'max_depth': 9, 'learning_rate': 0.07622808698966169, 'subsample': 0.9445162570981969, 'colsample_bytree': 0.5150748922459792, 'gamma': 4.347864816248884, 'lambda': 1.1619119670812665, 'alpha': 2.7109580728292437, 'min_child_weight': 3}. Best is trial 3 with value: 0.9073744933196914.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:29:44,917] Trial 7 finished with value: 0.9072088335946324 and parameters: {'n_estimators': 500, 'max_depth': 15, 'learning_rate': 0.021412508382068065, 'subsample': 0.5359877017387136, 'colsample_bytree': 0.8520020047037105, 'gamma': 1.245433811225637, 'lambda': 3.3903489923716026, 'alpha': 3.817009976355905, 'min_child_weight': 9}. Best is trial 3 with value: 0.9073744933196914.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:30:08,525] Trial 8 finished with value: 0.8967879652863417 and parameters: {'n_estimators': 800, 'max_depth': 4, 'learning_rate': 0.011610208689979617, 'subsample': 0.5107576367130495, 'colsample_bytree': 0.5707640868105883, 'gamma': 3.470278987761507, 'lambda': 3.256439991770533, 'alpha': 0.854425729962266, 'min_child_weight': 8}. Best is trial 3 with value: 0.9073744933196914.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:30:24,598] Trial 9 finished with value: 0.8891095250754181 and parameters: {'n_estimators': 550, 'max_depth': 4, 'learning_rate': 0.010461135555518884, 'subsample': 0.6028024230787685, 'colsample_bytree': 0.770086184928056, 'gamma': 3.427687704676252, 'lambda': 9.087375156817101, 'alpha': 7.0460634945269875, 'min_child_weight': 5}. Best is trial 3 with value: 0.9073744933196914.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:30:59,506] Trial 10 finished with value: 0.9011298549649633 and parameters: {'n_estimators': 950, 'max_depth': 12, 'learning_rate': 0.2902561846946892, 'subsample': 0.6431284944522626, 'colsample_bytree': 0.9875922246578014, 'gamma': 0.9907428081102299, 'lambda': 6.877984771500124, 'alpha': 9.112381944935859, 'min_child_weight': 1}. Best is trial 3 with value: 0.9073744933196914.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:31:15,378] Trial 11 finished with value: 0.9070390442635018 and parameters: {'n_estimators': 300, 'max_depth': 15, 'learning_rate': 0.03061304535738472, 'subsample': 0.5126198661119236, 'colsample_bytree': 0.9623426843110858, 'gamma': 0.12229018746864262, 'lambda': 4.139228475068046, 'alpha': 2.3162969528616104, 'min_child_weight': 7}. Best is trial 3 with value: 0.9073744933196914.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:31:44,238] Trial 12 finished with value: 0.9069237054240848 and parameters: {'n_estimators': 750, 'max_depth': 12, 'learning_rate': 0.03478601898413875, 'subsample': 0.5909830142012604, 'colsample_bytree': 0.9051338721234585, 'gamma': 1.1881092010081997, 'lambda': 2.9993898964799084, 'alpha': 7.233258000479894, 'min_child_weight': 6}. Best is trial 3 with value: 0.9073744933196914.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:31:49,782] Trial 13 finished with value: 0.8967967923106749 and parameters: {'n_estimators': 100, 'max_depth': 15, 'learning_rate': 0.021514409633815842, 'subsample': 0.7530263901447486, 'colsample_bytree': 0.9042080052761232, 'gamma': 2.5744894612724076, 'lambda': 4.953415086231141, 'alpha': 1.9166252556197296, 'min_child_weight': 10}. Best is trial 3 with value: 0.9073744933196914.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:32:04,866] Trial 14 finished with value: 0.9071840673697751 and parameters: {'n_estimators': 350, 'max_depth': 13, 'learning_rate': 0.04400860987115598, 'subsample': 0.5700463165244136, 'colsample_bytree': 0.8922804183907064, 'gamma': 1.3552874252553466, 'lambda': 1.4447655277134128, 'alpha': 4.0686502001054246, 'min_child_weight': 1}. Best is trial 3 with value: 0.9073744933196914.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:32:32,687] Trial 15 finished with value: 0.9067169982722706 and parameters: {'n_estimators': 1000, 'max_depth': 7, 'learning_rate': 0.12646574064769436, 'subsample': 0.5535616130456436, 'colsample_bytree': 0.6515386447817648, 'gamma': 2.679685829390452, 'lambda': 2.5898503390245327, 'alpha': 6.120619431382753, 'min_child_weight': 9}. Best is trial 3 with value: 0.9073744933196914.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:33:05,325] Trial 16 finished with value: 0.9068038175168505 and parameters: {'n_estimators': 850, 'max_depth': 10, 'learning_rate': 0.018925983700714, 'subsample': 0.6654070623677579, 'colsample_bytree': 0.8246563580079468, 'gamma': 0.1408634512850071, 'lambda': 8.017930903044967, 'alpha': 9.627815914130277, 'min_child_weight': 6}. Best is trial 3 with value: 0.9073744933196914.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:33:27,714] Trial 17 finished with value: 0.9072134303835715 and parameters: {'n_estimators': 650, 'max_depth': 11, 'learning_rate': 0.04697313517466058, 'subsample': 0.7413229677333036, 'colsample_bytree': 0.9325046826546517, 'gamma': 1.7529924468628448, 'lambda': 4.2185019022479775, 'alpha': 1.5111242491430565, 'min_child_weight': 5}. Best is trial 3 with value: 0.9073744933196914.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:33:49,806] Trial 18 finished with value: 0.9070833311936635 and parameters: {'n_estimators': 650, 'max_depth': 10, 'learning_rate': 0.04739992942663502, 'subsample': 0.7285790216496795, 'colsample_bytree': 0.9544253287700332, 'gamma': 1.818550514292088, 'lambda': 4.636967990614426, 'alpha': 1.31040069484472, 'min_child_weight': 4}. Best is trial 3 with value: 0.9073744933196914.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:34:13,190] Trial 19 finished with value: 0.9066446710333222 and parameters: {'n_estimators': 900, 'max_depth': 6, 'learning_rate': 0.12666656445718977, 'subsample': 0.7742982209192169, 'colsample_bytree': 0.7668480738263157, 'gamma': 2.9975978575926496, 'lambda': 5.968910121646702, 'alpha': 1.4687288564048795, 'min_child_weight': 5}. Best is trial 3 with value: 0.9073744933196914.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:34:31,686] Trial 20 finished with value: 0.9062281901289033 and parameters: {'n_estimators': 700, 'max_depth': 11, 'learning_rate': 0.10054297254267552, 'subsample': 0.8312530260659694, 'colsample_bytree': 0.9962201477724889, 'gamma': 4.751280567071735, 'lambda': 8.232554758742284, 'alpha': 0.0874174513854169, 'min_child_weight': 2}. Best is trial 3 with value: 0.9073744933196914.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:34:52,400] Trial 21 finished with value: 0.9056577152389017 and parameters: {'n_estimators': 450, 'max_depth': 14, 'learning_rate': 0.057641071287242315, 'subsample': 0.6247525869947386, 'colsample_bytree': 0.8590774165047559, 'gamma': 0.670354976705756, 'lambda': 3.841138652094919, 'alpha': 3.285368718417667, 'min_child_weight': 7}. Best is trial 3 with value: 0.9073744933196914.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:35:14,284] Trial 22 finished with value: 0.9075617710587821 and parameters: {'n_estimators': 600, 'max_depth': 14, 'learning_rate': 0.03824915804429822, 'subsample': 0.7189775431541543, 'colsample_bytree': 0.9335818956329129, 'gamma': 1.727875552937482, 'lambda': 2.157159689979591, 'alpha': 3.8074944992937843, 'min_child_weight': 3}. Best is trial 22 with value: 0.9075617710587821.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:35:39,155] Trial 23 finished with value: 0.9073961116795285 and parameters: {'n_estimators': 650, 'max_depth': 13, 'learning_rate': 0.03588083648393655, 'subsample': 0.712754845809239, 'colsample_bytree': 0.9350438589487959, 'gamma': 1.603736186552121, 'lambda': 2.2258254291355444, 'alpha': 5.738233181602995, 'min_child_weight': 3}. Best is trial 22 with value: 0.9075617710587821.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:36:00,782] Trial 24 finished with value: 0.907241191973342 and parameters: {'n_estimators': 600, 'max_depth': 13, 'learning_rate': 0.03960876371032774, 'subsample': 0.6911635967001108, 'colsample_bytree': 0.927522204538177, 'gamma': 2.2862929198666277, 'lambda': 2.0140484684963367, 'alpha': 5.966947352992751, 'min_child_weight': 3}. Best is trial 22 with value: 0.9075617710587821.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:36:26,679] Trial 25 finished with value: 0.9069570586844445 and parameters: {'n_estimators': 800, 'max_depth': 14, 'learning_rate': 0.02633261953893248, 'subsample': 0.7151024030950179, 'colsample_bytree': 0.7931271409635057, 'gamma': 3.01603544240029, 'lambda': 2.3979621833361167, 'alpha': 5.557561651711615, 'min_child_weight': 2}. Best is trial 22 with value: 0.9075617710587821.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:36:48,060] Trial 26 finished with value: 0.9072874167960194 and parameters: {'n_estimators': 600, 'max_depth': 13, 'learning_rate': 0.03644606669582069, 'subsample': 0.7825188958952867, 'colsample_bytree': 0.8806722514941625, 'gamma': 1.568244203238824, 'lambda': 6.070817224525774, 'alpha': 7.979149775038265, 'min_child_weight': 4}. Best is trial 22 with value: 0.9075617710587821.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:37:21,668] Trial 27 finished with value: 0.9047632547361268 and parameters: {'n_estimators': 750, 'max_depth': 14, 'learning_rate': 0.07114807048147173, 'subsample': 0.6421540292472119, 'colsample_bytree': 0.9602249391622473, 'gamma': 0.7363912078833375, 'lambda': 1.9569794730383523, 'alpha': 6.451111491905538, 'min_child_weight': 1}. Best is trial 22 with value: 0.9075617710587821.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:37:31,123] Trial 28 finished with value: 0.9066142567716229 and parameters: {'n_estimators': 250, 'max_depth': 15, 'learning_rate': 0.05507177535515401, 'subsample': 0.8890162499374901, 'colsample_bytree': 0.9288312457174275, 'gamma': 2.267275650284302, 'lambda': 8.032495101785157, 'alpha': 8.215558956843559, 'min_child_weight': 3}. Best is trial 22 with value: 0.9075617710587821.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:37:42,937] Trial 29 finished with value: 0.9054187724692353 and parameters: {'n_estimators': 450, 'max_depth': 13, 'learning_rate': 0.23857072206766788, 'subsample': 0.985979531404356, 'colsample_bytree': 0.7087239543512353, 'gamma': 2.9290355694940455, 'lambda': 7.153948972591456, 'alpha': 5.423023020011854, 'min_child_weight': 2}. Best is trial 22 with value: 0.9075617710587821.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:38:03,855] Trial 30 finished with value: 0.9063478660575308 and parameters: {'n_estimators': 550, 'max_depth': 11, 'learning_rate': 0.016152101734837377, 'subsample': 0.7942141101865807, 'colsample_bytree': 0.6107800895848914, 'gamma': 2.1887112432217304, 'lambda': 1.6578793129620717, 'alpha': 4.354735668047305, 'min_child_weight': 4}. Best is trial 22 with value: 0.9075617710587821.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:38:25,028] Trial 31 finished with value: 0.9072312704754519 and parameters: {'n_estimators': 600, 'max_depth': 13, 'learning_rate': 0.03593596733090634, 'subsample': 0.7845200161244034, 'colsample_bytree': 0.8722227084803451, 'gamma': 1.4879205340762396, 'lambda': 6.094352901644149, 'alpha': 8.083142688971435, 'min_child_weight': 4}. Best is trial 22 with value: 0.9075617710587821.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:38:47,911] Trial 32 finished with value: 0.9072294660637552 and parameters: {'n_estimators': 650, 'max_depth': 14, 'learning_rate': 0.029874242440248357, 'subsample': 0.8301289109250642, 'colsample_bytree': 0.8974034595920937, 'gamma': 1.684767321947861, 'lambda': 5.721456290269444, 'alpha': 7.840972938224949, 'min_child_weight': 3}. Best is trial 22 with value: 0.9075617710587821.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:39:15,304] Trial 33 finished with value: 0.9058524945308741 and parameters: {'n_estimators': 700, 'max_depth': 12, 'learning_rate': 0.06494945193564551, 'subsample': 0.7594153525465943, 'colsample_bytree': 0.9695822675680796, 'gamma': 0.8746659698090411, 'lambda': 6.401951996271111, 'alpha': 4.864064838092833, 'min_child_weight': 2}. Best is trial 22 with value: 0.9075617710587821.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:39:30,644] Trial 34 finished with value: 0.9071007649643973 and parameters: {'n_estimators': 450, 'max_depth': 14, 'learning_rate': 0.0504238131140042, 'subsample': 0.7091989869428182, 'colsample_bytree': 0.8110855979171263, 'gamma': 1.9987501772181657, 'lambda': 5.174360405994346, 'alpha': 9.112435120593851, 'min_child_weight': 3}. Best is trial 22 with value: 0.9075617710587821.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:39:45,681] Trial 35 finished with value: 0.9070568497748592 and parameters: {'n_estimators': 500, 'max_depth': 13, 'learning_rate': 0.09027760853263278, 'subsample': 0.88352863686079, 'colsample_bytree': 0.9335832012131897, 'gamma': 1.5417001208074186, 'lambda': 7.390842458520579, 'alpha': 6.870312301274485, 'min_child_weight': 4}. Best is trial 22 with value: 0.9075617710587821.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:40:07,530] Trial 36 finished with value: 0.9075047443180959 and parameters: {'n_estimators': 550, 'max_depth': 15, 'learning_rate': 0.024933290021700655, 'subsample': 0.8185914410530063, 'colsample_bytree': 0.8771555723863844, 'gamma': 1.8811850038443612, 'lambda': 5.399986188279734, 'alpha': 3.1914855389452166, 'min_child_weight': 3}. Best is trial 22 with value: 0.9075617710587821.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:40:29,721] Trial 37 finished with value: 0.9078832869305938 and parameters: {'n_estimators': 550, 'max_depth': 15, 'learning_rate': 0.025641964168839477, 'subsample': 0.6672278260515603, 'colsample_bytree': 0.836610959576052, 'gamma': 1.9499820773329337, 'lambda': 1.0264669071059975, 'alpha': 3.2046203349083613, 'min_child_weight': 1}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:40:52,583] Trial 38 finished with value: 0.9077055579113537 and parameters: {'n_estimators': 400, 'max_depth': 15, 'learning_rate': 0.022385744588208724, 'subsample': 0.8159637979333167, 'colsample_bytree': 0.8326910282122956, 'gamma': 0.49044943715153333, 'lambda': 1.0555824237601135, 'alpha': 3.4565784718166936, 'min_child_weight': 1}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:41:12,291] Trial 39 finished with value: 0.9075863280714744 and parameters: {'n_estimators': 350, 'max_depth': 15, 'learning_rate': 0.023646700728662387, 'subsample': 0.8573620456965557, 'colsample_bytree': 0.7222271351706918, 'gamma': 0.520024121034846, 'lambda': 1.1504996892670145, 'alpha': 3.4629390928393917, 'min_child_weight': 1}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:41:33,324] Trial 40 finished with value: 0.9068757322101579 and parameters: {'n_estimators': 350, 'max_depth': 15, 'learning_rate': 0.015904827296468176, 'subsample': 0.9324411964274768, 'colsample_bytree': 0.7250163503667162, 'gamma': 0.4471436462550855, 'lambda': 1.1470474284415777, 'alpha': 3.665751500458233, 'min_child_weight': 1}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:41:46,391] Trial 41 finished with value: 0.9060758632703405 and parameters: {'n_estimators': 200, 'max_depth': 15, 'learning_rate': 0.02325581198001718, 'subsample': 0.8595940866827726, 'colsample_bytree': 0.8407569088702005, 'gamma': 0.45402631434177054, 'lambda': 1.5458700559103788, 'alpha': 2.90836507737916, 'min_child_weight': 1}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:42:08,751] Trial 42 finished with value: 0.9069628488472368 and parameters: {'n_estimators': 400, 'max_depth': 15, 'learning_rate': 0.013638589516890181, 'subsample': 0.8223155006003395, 'colsample_bytree': 0.7928022855696061, 'gamma': 1.1818366679018957, 'lambda': 1.028848490057434, 'alpha': 2.429032286135098, 'min_child_weight': 1}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:42:26,334] Trial 43 finished with value: 0.9068393967254837 and parameters: {'n_estimators': 350, 'max_depth': 14, 'learning_rate': 0.018668760214453423, 'subsample': 0.8581411658712534, 'colsample_bytree': 0.6843174891608048, 'gamma': 1.0150836086162416, 'lambda': 2.8033089869427266, 'alpha': 3.557441413821383, 'min_child_weight': 2}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:42:54,678] Trial 44 finished with value: 0.907604948645762 and parameters: {'n_estimators': 500, 'max_depth': 15, 'learning_rate': 0.02623601403224335, 'subsample': 0.8115991844684033, 'colsample_bytree': 0.7455686906336432, 'gamma': 0.4557159897715397, 'lambda': 1.9083230783317222, 'alpha': 4.161227842890416, 'min_child_weight': 1}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:43:15,985] Trial 45 finished with value: 0.9076158653711075 and parameters: {'n_estimators': 400, 'max_depth': 14, 'learning_rate': 0.030147261809542334, 'subsample': 0.6649766916771177, 'colsample_bytree': 0.7382678156034345, 'gamma': 0.3730881492723912, 'lambda': 1.78693266582797, 'alpha': 4.578719205105856, 'min_child_weight': 1}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:43:36,722] Trial 46 finished with value: 0.9049458281408517 and parameters: {'n_estimators': 300, 'max_depth': 15, 'learning_rate': 0.01336229660828388, 'subsample': 0.6614201379737271, 'colsample_bytree': 0.750985774396286, 'gamma': 0.0007827147989360461, 'lambda': 1.6566988888642773, 'alpha': 4.566478334029794, 'min_child_weight': 1}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:43:57,806] Trial 47 finished with value: 0.9076236314649908 and parameters: {'n_estimators': 400, 'max_depth': 14, 'learning_rate': 0.0301381387306315, 'subsample': 0.9097590146772079, 'colsample_bytree': 0.6858465399593726, 'gamma': 0.3583500910457108, 'lambda': 1.0063228235066983, 'alpha': 4.438428431569027, 'min_child_weight': 1}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:44:17,979] Trial 48 finished with value: 0.9074920484248356 and parameters: {'n_estimators': 400, 'max_depth': 14, 'learning_rate': 0.03008847878912973, 'subsample': 0.9867469847217418, 'colsample_bytree': 0.6506395945720451, 'gamma': 0.3142975765175824, 'lambda': 3.0698016654622142, 'alpha': 4.45317950385058, 'min_child_weight': 2}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:44:42,867] Trial 49 finished with value: 0.9069963079239972 and parameters: {'n_estimators': 500, 'max_depth': 12, 'learning_rate': 0.018355313422533017, 'subsample': 0.9603350786292203, 'colsample_bytree': 0.701482457677449, 'gamma': 0.24357465399651299, 'lambda': 1.7598094106767739, 'alpha': 5.130414092598566, 'min_child_weight': 1}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:44:53,492] Trial 50 finished with value: 0.8988569511822432 and parameters: {'n_estimators': 300, 'max_depth': 6, 'learning_rate': 0.020679294693617133, 'subsample': 0.6139505881617581, 'colsample_bytree': 0.6536178276795458, 'gamma': 0.6763911003535439, 'lambda': 3.4223494678139526, 'alpha': 4.042378396577376, 'min_child_weight': 2}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:45:16,010] Trial 51 finished with value: 0.9077613753711462 and parameters: {'n_estimators': 400, 'max_depth': 15, 'learning_rate': 0.026764388841650538, 'subsample': 0.8841320737000552, 'colsample_bytree': 0.7449062186647266, 'gamma': 0.46290857429863097, 'lambda': 1.2903587389073115, 'alpha': 4.838648132156235, 'min_child_weight': 1}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:45:37,535] Trial 52 finished with value: 0.9076182880825575 and parameters: {'n_estimators': 450, 'max_depth': 14, 'learning_rate': 0.028977147471268852, 'subsample': 0.913709096284581, 'colsample_bytree': 0.7486034659198505, 'gamma': 0.9460163090673461, 'lambda': 1.4483136506008623, 'alpha': 5.023986644775724, 'min_child_weight': 1}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:45:56,373] Trial 53 finished with value: 0.9076123346996963 and parameters: {'n_estimators': 400, 'max_depth': 14, 'learning_rate': 0.03119623586499639, 'subsample': 0.9169080198750968, 'colsample_bytree': 0.78063611138208, 'gamma': 0.9412381181250444, 'lambda': 1.3860108281865362, 'alpha': 4.940246426292781, 'min_child_weight': 2}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:46:13,461] Trial 54 finished with value: 0.9064086437475605 and parameters: {'n_estimators': 450, 'max_depth': 8, 'learning_rate': 0.03062155560852725, 'subsample': 0.8895805784838193, 'colsample_bytree': 0.7528972739834229, 'gamma': 0.05027588049904652, 'lambda': 2.507135188424864, 'alpha': 5.2018335492016154, 'min_child_weight': 1}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:46:28,072] Trial 55 finished with value: 0.9074916704597178 and parameters: {'n_estimators': 250, 'max_depth': 14, 'learning_rate': 0.042465356434434526, 'subsample': 0.9074456759474516, 'colsample_bytree': 0.6750344386182026, 'gamma': 0.2708664159670173, 'lambda': 1.022367814129896, 'alpha': 2.7098205498975165, 'min_child_weight': 1}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:46:41,070] Trial 56 finished with value: 0.9053970071421749 and parameters: {'n_estimators': 250, 'max_depth': 13, 'learning_rate': 0.021072886356780857, 'subsample': 0.9528933076795515, 'colsample_bytree': 0.8189970513399218, 'gamma': 0.8098119297119335, 'lambda': 1.4605384524260177, 'alpha': 4.708969805885068, 'min_child_weight': 2}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:47:02,139] Trial 57 finished with value: 0.907767640324521 and parameters: {'n_estimators': 450, 'max_depth': 15, 'learning_rate': 0.028512592886531202, 'subsample': 0.9324087597645658, 'colsample_bytree': 0.6268607987224811, 'gamma': 1.0946620656141484, 'lambda': 9.863605464872753, 'alpha': 2.2379124858241157, 'min_child_weight': 1}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:47:29,756] Trial 58 finished with value: 0.9076949627848732 and parameters: {'n_estimators': 500, 'max_depth': 15, 'learning_rate': 0.013710582426058692, 'subsample': 0.9341221963825435, 'colsample_bytree': 0.5910030235624069, 'gamma': 1.2083699927943852, 'lambda': 1.4347236860404606, 'alpha': 2.1396584110263066, 'min_child_weight': 1}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:47:56,379] Trial 59 finished with value: 0.9070113929856003 and parameters: {'n_estimators': 500, 'max_depth': 15, 'learning_rate': 0.011737043565775956, 'subsample': 0.870100110239329, 'colsample_bytree': 0.5538787499144094, 'gamma': 1.3464455920141785, 'lambda': 8.732895758469407, 'alpha': 0.816983690040832, 'min_child_weight': 2}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:48:13,063] Trial 60 finished with value: 0.8811212803784492 and parameters: {'n_estimators': 550, 'max_depth': 3, 'learning_rate': 0.010213528736253082, 'subsample': 0.971016371552768, 'colsample_bytree': 0.5909432523084958, 'gamma': 1.1623764514169077, 'lambda': 9.844661056746265, 'alpha': 1.86757322722992, 'min_child_weight': 2}. Best is trial 37 with value: 0.9078832869305938.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:48:39,936] Trial 61 finished with value: 0.9081425139435153 and parameters: {'n_estimators': 450, 'max_depth': 15, 'learning_rate': 0.018004336647421862, 'subsample': 0.9285938694513046, 'colsample_bytree': 0.6200510411772163, 'gamma': 0.5833926188310681, 'lambda': 1.3398600024441303, 'alpha': 2.1834951285558426, 'min_child_weight': 1}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:49:08,822] Trial 62 finished with value: 0.907466684994344 and parameters: {'n_estimators': 450, 'max_depth': 15, 'learning_rate': 0.012975715553318182, 'subsample': 0.9277448370125099, 'colsample_bytree': 0.6231598934678505, 'gamma': 0.5532979272136427, 'lambda': 2.1542812706411834, 'alpha': 2.0956609026491044, 'min_child_weight': 1}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:49:27,124] Trial 63 finished with value: 0.9072499536404861 and parameters: {'n_estimators': 300, 'max_depth': 15, 'learning_rate': 0.017141760031630246, 'subsample': 0.9348516890475064, 'colsample_bytree': 0.5369125006462936, 'gamma': 0.6542510040466762, 'lambda': 1.3184376823029311, 'alpha': 2.5800809277149943, 'min_child_weight': 1}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:49:47,675] Trial 64 finished with value: 0.9078727298426889 and parameters: {'n_estimators': 400, 'max_depth': 15, 'learning_rate': 0.02214386544142801, 'subsample': 0.9018448811734681, 'colsample_bytree': 0.5743015618894232, 'gamma': 1.0347019113919713, 'lambda': 2.683443201210218, 'alpha': 2.98944847911966, 'min_child_weight': 2}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:50:07,252] Trial 65 finished with value: 0.9079658078161491 and parameters: {'n_estimators': 350, 'max_depth': 15, 'learning_rate': 0.02239822344409192, 'subsample': 0.9684955199664084, 'colsample_bytree': 0.5885989140614989, 'gamma': 1.0514699465240933, 'lambda': 2.2468225890600233, 'alpha': 1.2452032583760462, 'min_child_weight': 2}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:50:22,978] Trial 66 finished with value: 0.9071433757766028 and parameters: {'n_estimators': 350, 'max_depth': 15, 'learning_rate': 0.022465775824142217, 'subsample': 0.9660782746104566, 'colsample_bytree': 0.5200586293985413, 'gamma': 3.2249359896414047, 'lambda': 2.767447417713573, 'alpha': 1.166518826683834, 'min_child_weight': 2}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:50:40,895] Trial 67 finished with value: 0.9067610870560346 and parameters: {'n_estimators': 350, 'max_depth': 13, 'learning_rate': 0.018619519699662978, 'subsample': 0.9956166176222325, 'colsample_bytree': 0.6157717887178098, 'gamma': 1.0312459045713207, 'lambda': 2.4370277167584304, 'alpha': 0.3384443649700817, 'min_child_weight': 2}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:50:47,175] Trial 68 finished with value: 0.9017646278422489 and parameters: {'n_estimators': 100, 'max_depth': 15, 'learning_rate': 0.020201192150350867, 'subsample': 0.8413100747273314, 'colsample_bytree': 0.5793956469917043, 'gamma': 1.3629809535163102, 'lambda': 3.620230576661335, 'alpha': 1.6406108545600695, 'min_child_weight': 7}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:51:05,790] Trial 69 finished with value: 0.9060048881298464 and parameters: {'n_estimators': 400, 'max_depth': 14, 'learning_rate': 0.015341017885879603, 'subsample': 0.9477199213499977, 'colsample_bytree': 0.6344635205450048, 'gamma': 2.417068286529095, 'lambda': 2.1645517042562523, 'alpha': 3.023632808248191, 'min_child_weight': 10}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:51:27,511] Trial 70 finished with value: 0.9072984683854051 and parameters: {'n_estimators': 550, 'max_depth': 10, 'learning_rate': 0.026795455288739185, 'subsample': 0.8985888942051381, 'colsample_bytree': 0.5560809227050761, 'gamma': 0.8042932996810261, 'lambda': 4.260918835642544, 'alpha': 0.9925077623396068, 'min_child_weight': 2}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:51:53,285] Trial 71 finished with value: 0.9075487366222004 and parameters: {'n_estimators': 450, 'max_depth': 15, 'learning_rate': 0.014208476940072198, 'subsample': 0.974096287051151, 'colsample_bytree': 0.5901432501047125, 'gamma': 1.138989855677545, 'lambda': 1.2688625669742923, 'alpha': 2.1503443997793408, 'min_child_weight': 1}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:52:16,087] Trial 72 finished with value: 0.9078165313055315 and parameters: {'n_estimators': 500, 'max_depth': 15, 'learning_rate': 0.023975287327115343, 'subsample': 0.8780764350639634, 'colsample_bytree': 0.6004933262498307, 'gamma': 1.377150103387783, 'lambda': 1.6054384803001056, 'alpha': 1.799277743046658, 'min_child_weight': 9}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:52:35,573] Trial 73 finished with value: 0.9078785362583268 and parameters: {'n_estimators': 450, 'max_depth': 15, 'learning_rate': 0.03280930922669752, 'subsample': 0.8443283298705422, 'colsample_bytree': 0.5626178358932934, 'gamma': 1.4059541808496685, 'lambda': 1.9143546161135205, 'alpha': 1.5931829508615092, 'min_child_weight': 8}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:52:54,534] Trial 74 finished with value: 0.907933071472322 and parameters: {'n_estimators': 450, 'max_depth': 14, 'learning_rate': 0.03402793954900171, 'subsample': 0.8790611271947292, 'colsample_bytree': 0.5648943416168108, 'gamma': 1.398869479230691, 'lambda': 1.9645698914787983, 'alpha': 1.784239475867564, 'min_child_weight': 9}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:53:15,244] Trial 75 finished with value: 0.9075988718104331 and parameters: {'n_estimators': 550, 'max_depth': 14, 'learning_rate': 0.03327285512209927, 'subsample': 0.8430138684658478, 'colsample_bytree': 0.5518796651263849, 'gamma': 1.9847283280724264, 'lambda': 2.9037547210335264, 'alpha': 1.772690786240814, 'min_child_weight': 9}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:53:35,532] Trial 76 finished with value: 0.9077253154932414 and parameters: {'n_estimators': 500, 'max_depth': 14, 'learning_rate': 0.03402112071526843, 'subsample': 0.877857667579165, 'colsample_bytree': 0.570637434587585, 'gamma': 1.3716642873163551, 'lambda': 2.2937809609352335, 'alpha': 1.2916639343937655, 'min_child_weight': 8}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:53:54,574] Trial 77 finished with value: 0.9076945796326771 and parameters: {'n_estimators': 500, 'max_depth': 15, 'learning_rate': 0.039926660237515184, 'subsample': 0.9007523890379816, 'colsample_bytree': 0.6041845528321053, 'gamma': 1.6628238053076019, 'lambda': 2.63807742825603, 'alpha': 0.4922787038487004, 'min_child_weight': 9}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:54:14,164] Trial 78 finished with value: 0.9078367463877401 and parameters: {'n_estimators': 450, 'max_depth': 14, 'learning_rate': 0.023470442442604694, 'subsample': 0.920657161742724, 'colsample_bytree': 0.5085962612468863, 'gamma': 1.8683845967144916, 'lambda': 1.9788384765971252, 'alpha': 0.7012297920814372, 'min_child_weight': 8}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:54:36,512] Trial 79 finished with value: 0.9078878781866491 and parameters: {'n_estimators': 550, 'max_depth': 13, 'learning_rate': 0.023979680488488142, 'subsample': 0.8715295221945276, 'colsample_bytree': 0.505676011727185, 'gamma': 1.8486324096669382, 'lambda': 1.94136318615627, 'alpha': 0.0020000390340970675, 'min_child_weight': 8}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:54:56,048] Trial 80 finished with value: 0.9068312969295522 and parameters: {'n_estimators': 450, 'max_depth': 12, 'learning_rate': 0.017188749687636132, 'subsample': 0.8009598493694842, 'colsample_bytree': 0.5012071469377248, 'gamma': 1.8558578676926163, 'lambda': 3.148258432262424, 'alpha': 1.0708631654003642, 'min_child_weight': 8}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:55:19,220] Trial 81 finished with value: 0.907573325448974 and parameters: {'n_estimators': 600, 'max_depth': 13, 'learning_rate': 0.02406190586942984, 'subsample': 0.9205922882399539, 'colsample_bytree': 0.5255375046378642, 'gamma': 2.064300801213121, 'lambda': 2.009259654518404, 'alpha': 0.034521653860544434, 'min_child_weight': 8}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:55:41,353] Trial 82 finished with value: 0.9079598824435118 and parameters: {'n_estimators': 500, 'max_depth': 14, 'learning_rate': 0.024006755026162018, 'subsample': 0.8693724130747571, 'colsample_bytree': 0.5371387564708834, 'gamma': 1.4574300687209316, 'lambda': 1.8371659180385598, 'alpha': 0.5332606196741889, 'min_child_weight': 9}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:56:08,101] Trial 83 finished with value: 0.908068438659123 and parameters: {'n_estimators': 600, 'max_depth': 14, 'learning_rate': 0.019264360953722706, 'subsample': 0.871843136814963, 'colsample_bytree': 0.5354078470443434, 'gamma': 1.4777825896320076, 'lambda': 1.8889179741227313, 'alpha': 0.5869793753498997, 'min_child_weight': 6}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:56:33,874] Trial 84 finished with value: 0.9078433668288926 and parameters: {'n_estimators': 600, 'max_depth': 13, 'learning_rate': 0.019763525015351186, 'subsample': 0.8648472535394778, 'colsample_bytree': 0.5393246146851121, 'gamma': 1.477366925663297, 'lambda': 1.8325363047487493, 'alpha': 0.2750959755665673, 'min_child_weight': 6}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:56:58,674] Trial 85 finished with value: 0.9067841387785553 and parameters: {'n_estimators': 650, 'max_depth': 14, 'learning_rate': 0.02140457298491723, 'subsample': 0.8461450333404811, 'colsample_bytree': 0.5694823307074572, 'gamma': 3.8864750449199525, 'lambda': 2.3027098469980403, 'alpha': 1.4222669852553216, 'min_child_weight': 7}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:57:25,216] Trial 86 finished with value: 0.9075598760461149 and parameters: {'n_estimators': 600, 'max_depth': 13, 'learning_rate': 0.016968207723494513, 'subsample': 0.8306913480321939, 'colsample_bytree': 0.5393508195068448, 'gamma': 1.5848645117776399, 'lambda': 2.6147706662072006, 'alpha': 0.47521865273142, 'min_child_weight': 10}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:57:48,941] Trial 87 finished with value: 0.907697301119847 and parameters: {'n_estimators': 550, 'max_depth': 14, 'learning_rate': 0.025241925387556836, 'subsample': 0.5767218369593006, 'colsample_bytree': 0.5301762934912522, 'gamma': 1.729767822400544, 'lambda': 1.7384392772555932, 'alpha': 0.7182307254756743, 'min_child_weight': 9}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:58:15,284] Trial 88 finished with value: 0.9075869812975563 and parameters: {'n_estimators': 550, 'max_depth': 14, 'learning_rate': 0.01509229309212609, 'subsample': 0.8981112311931956, 'colsample_bytree': 0.5586837172376735, 'gamma': 1.266760010098956, 'lambda': 2.13676322526422, 'alpha': 0.9354382296285273, 'min_child_weight': 9}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:58:38,061] Trial 89 finished with value: 0.9074176864679065 and parameters: {'n_estimators': 650, 'max_depth': 12, 'learning_rate': 0.03342752951299893, 'subsample': 0.8706840216437864, 'colsample_bytree': 0.573895132362114, 'gamma': 2.184548585184946, 'lambda': 2.4285693878230563, 'alpha': 1.5178419883792926, 'min_child_weight': 5}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:59:10,420] Trial 90 finished with value: 0.9080985427335297 and parameters: {'n_estimators': 750, 'max_depth': 14, 'learning_rate': 0.019495858943082395, 'subsample': 0.7383895198177027, 'colsample_bytree': 0.5207094197190507, 'gamma': 1.4887146246592788, 'lambda': 1.9433260582653789, 'alpha': 0.13235471016593264, 'min_child_weight': 7}. Best is trial 61 with value: 0.9081425139435153.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 01:59:43,890] Trial 91 finished with value: 0.9082859238683847 and parameters: {'n_estimators': 750, 'max_depth': 14, 'learning_rate': 0.01825270924751515, 'subsample': 0.7340677004853492, 'colsample_bytree': 0.5125729570251544, 'gamma': 1.5197921362681555, 'lambda': 1.8940631696982866, 'alpha': 0.01734993356678516, 'min_child_weight': 7}. Best is trial 91 with value: 0.9082859238683847.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 02:00:18,636] Trial 92 finished with value: 0.9080547216030147 and parameters: {'n_estimators': 800, 'max_depth': 13, 'learning_rate': 0.019404942201004482, 'subsample': 0.7356851005820221, 'colsample_bytree': 0.5108934566393454, 'gamma': 1.4228831676129605, 'lambda': 1.9014009034734647, 'alpha': 0.026091302751601875, 'min_child_weight': 6}. Best is trial 91 with value: 0.9082859238683847.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 02:00:51,827] Trial 93 finished with value: 0.907950181223554 and parameters: {'n_estimators': 800, 'max_depth': 13, 'learning_rate': 0.01777601259727703, 'subsample': 0.7597768326952247, 'colsample_bytree': 0.5129831247620176, 'gamma': 1.7579923044446455, 'lambda': 1.6856072633420676, 'alpha': 0.05666922679608888, 'min_child_weight': 6}. Best is trial 91 with value: 0.9082859238683847.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 02:01:27,891] Trial 94 finished with value: 0.9078878439519313 and parameters: {'n_estimators': 850, 'max_depth': 13, 'learning_rate': 0.01763432099927913, 'subsample': 0.7441127013013387, 'colsample_bytree': 0.5136949077972554, 'gamma': 1.4710367439475645, 'lambda': 1.7350772983434244, 'alpha': 0.10950077060326863, 'min_child_weight': 6}. Best is trial 91 with value: 0.9082859238683847.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 02:01:58,410] Trial 95 finished with value: 0.9077340771603857 and parameters: {'n_estimators': 750, 'max_depth': 12, 'learning_rate': 0.019474906014748182, 'subsample': 0.7561477335727855, 'colsample_bytree': 0.5162155838928143, 'gamma': 1.754361308424574, 'lambda': 1.6127692189567095, 'alpha': 0.5290681844955896, 'min_child_weight': 7}. Best is trial 91 with value: 0.9082859238683847.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 02:02:33,051] Trial 96 finished with value: 0.9081078120427537 and parameters: {'n_estimators': 800, 'max_depth': 13, 'learning_rate': 0.014889667293243695, 'subsample': 0.7686726979027334, 'colsample_bytree': 0.5000754469552408, 'gamma': 1.6136292311962146, 'lambda': 2.06264292310525, 'alpha': 0.0030821183752487435, 'min_child_weight': 6}. Best is trial 91 with value: 0.9082859238683847.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 02:03:09,038] Trial 97 finished with value: 0.9075661873373906 and parameters: {'n_estimators': 800, 'max_depth': 13, 'learning_rate': 0.011719695924630675, 'subsample': 0.7691260048941203, 'colsample_bytree': 0.5474365001706301, 'gamma': 1.684443566945334, 'lambda': 2.9382781300595164, 'alpha': 0.28313161056666897, 'min_child_weight': 6}. Best is trial 91 with value: 0.9082859238683847.


  "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),


[I 2025-02-16 02:03:44,742] Trial 98 finished with value: 0.907206314749328 and parameters: {'n_estimators': 850, 'max_depth': 11, 'learning_rate': 0.01267360281060405, 'subsample': 0.7291326974406264, 'colsample_bytree': 0.5226159497243503, 'gamma': 1.5456631099546525, 'lambda': 2.431034117117983, 'alpha': 0.6337364902124597, 'min_child_weight': 6}. Best is trial 91 with value: 0.9082859238683847.


Best trial: 91. Best value: 0.908286: 100%|██████████| 100/100 [36:53<00:00, 22.13s/it]

[I 2025-02-16 02:04:22,808] Trial 99 finished with value: 0.9076724591636441 and parameters: {'n_estimators': 900, 'max_depth': 12, 'learning_rate': 0.015282505220064399, 'subsample': 0.7055903164112534, 'colsample_bytree': 0.5325557988623328, 'gamma': 1.3258005979879934, 'lambda': 3.3636225485788467, 'alpha': 0.18672967618162783, 'min_child_weight': 5}. Best is trial 91 with value: 0.9082859238683847.





In [None]:
import joblib

# Optuna 최적화 실행 후 최적의 study 저장
joblib.dump(study, "xgb_optuna_study.pkl")


['xgb_optuna_study.pkl']

In [49]:
import joblib

# 저장된 Optuna study 불러오기
study = joblib.load("xgb_optuna_study.pkl")

# 최적의 하이퍼파라미터 가져오기
best_params = study.best_params
print("Best Params:", best_params)

# 최적의 하이퍼파라미터로 모델 학습
model = xgb.XGBClassifier(**best_params, random_state=42, n_jobs=-1)
model.fit(X_train_encoded, y)


FileNotFoundError: [Errno 2] No such file or directory: 'xgb_optuna_study.pkl'

In [69]:
# 최적 하이퍼파라미터 가져오기
best_params = study.best_params

# 최적의 파라미터로 모델 재학습
model = xgb.XGBClassifier(
    **best_params,
    objective="binary:logistic",
    eval_metric="auc",
    use_label_encoder=False,
    verbosity=0,
    random_state=42,
    n_jobs=-1
)

In [70]:
model.fit(X_train_resampled, y_resampled)

In [71]:
from sklearn.metrics import accuracy_score, roc_auc_score

# 예측
y_train_pred = model.predict(X_train_encoded)
y_train_proba = model.predict_proba(X_train_encoded)[:, 1]  # ROC-AUC Score 계산용

# 평가
accuracy = accuracy_score(y, y_train_pred)
roc_auc = roc_auc_score(y, y_train_proba)

# 출력
print(f"Accuracy: {accuracy:.4f}")
print(f"ROC-AUC Score: {roc_auc:.4f}")

Accuracy: 0.7784
ROC-AUC Score: 0.8090


RandomForest (Hyperparameter tune)

In [52]:
model.save_model("xgboost_SMOTE_model.json")

### Predict

In [72]:
pred_proba = model.predict_proba(X_test_encoded)[:, 1]

### Submission

In [73]:
sample_submission = pd.read_csv('./Data/sample_submission.csv')
sample_submission['probability'] = pred_proba

In [75]:
sample_submission.to_csv('./XGBoost_SMOTE3_optimization_submit.csv', index=False)