In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install optuna catboost

Collecting optuna
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting catboost
  Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl.metadata (1.2 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.5.0-py3-none-any.whl (400 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.9/400.9 kB[0m [31m30.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m23.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna, catboost
Successfully installed catboost-1.2.8 colorlog-6.10.1 optuna-4.5.0


In [3]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
import glob
import random
import os
import matplotlib.pyplot as plt
import seaborn as sns
import ast
from functools import reduce
import warnings
warnings.filterwarnings('ignore')

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import ParameterSampler
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score

import optuna
from imblearn.over_sampling import RandomOverSampler
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import make_pipeline as imbalanced_make_pipeline

from lightgbm import LGBMClassifier
from lightgbm import early_stopping, log_evaluation
from xgboost import XGBClassifier
from catboost import CatBoostClassifier


from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import StackingClassifier

from tqdm import tqdm
from collections import defaultdict
from itertools import combinations

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
# from pytorch_tabnet.tab_network import TabNetEncoder

In [4]:
seed = 810

def set_seed(sd):
    random.seed(sd)
    np.random.seed(sd)
    os.environ['PYTHONHASHSEED'] = str(sd)
    random.seed(sd)
    np.random.seed(sd)
    torch.manual_seed(sd)
    torch.cuda.manual_seed_all(sd)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(seed)

In [5]:
data_dir = '/content/drive/MyDrive/ETRI_휴먼이해_인공지능/ETRI_lifelog_dataset/'

In [6]:
Q1_Q3 = pd.read_csv(data_dir + 'prep_df_Q1-3.csv')
S1_S2 = pd.read_csv(data_dir + 'prep_df_S1-2.csv')
S3 = pd.read_csv(data_dir + 'prep_df_S3.csv')

pred_df_list = [Q1_Q3,S1_S2,S3]
prep_df = reduce(lambda left, right: pd.merge(left, right, on=['subject_id', 'date'], how='outer'), pred_df_list)

metrics_train = pd.read_csv(data_dir + 'ch2025_metrics_train.csv')
sample_submission = pd.read_csv(data_dir + 'ch2025_submission_sample.csv')

In [7]:
prep_df.shape

(806, 432)

# train/test 데이터셋

In [8]:
# datetime 형으로 변환
metrics_train['lifelog_date'] = pd.to_datetime(metrics_train['lifelog_date']).dt.date
sample_submission['lifelog_date'] = pd.to_datetime(sample_submission['lifelog_date']).dt.date
prep_df['date'] = pd.to_datetime(prep_df['date']).dt.date
# prep_df.loc[:, 'date'] = pd.to_datetime(prep_df.loc[:, 'date']).dt.date

In [9]:
# train_df
metrics_train_renamed = metrics_train.rename(columns = {'lifelog_date': 'date'})

train_df = pd.merge(metrics_train_renamed, prep_df, on=['subject_id', 'date'], how='inner')
print(train_df.shape)

(450, 439)


In [10]:
# test_df
sample_submission_renamed = sample_submission.rename(columns = {'lifelog_date': 'date'})

test_df = pd.merge(sample_submission_renamed, prep_df, on=['subject_id', 'date'], how='inner')
print(test_df.shape)

(250, 439)


In [11]:
# def clear_columns_names(df):
#   df.columns = (df.columns
#                 .str.replace(r"[^\w]", "_", regex=True)
#                 .str.replace(r'__+','_', regex=True)
#                 .str.strip('_')
#                 )
#   return df

# train_df = clear_columns_names(train_df)
# test_df = clear_columns_names(test_df)

# X/y 설정

In [12]:
x = train_df.drop(columns=['subject_id', 'sleep_date', 'date', 'Q1', 'Q2', 'Q3', 'S1', 'S2', 'S3'])
y = train_df[['Q1', 'Q2', 'Q3', 'S1', 'S2', 'S3']]
test_x = test_df[x.columns]

x.fillna(0, inplace=True)
y.fillna(0, inplace=True)
test_x.fillna(0, inplace=True)

In [13]:
print('train shape:', train_df.shape)
print('train X shape:', x.shape)
print('train y shape:', y.shape)
print()
print('test X shape:', test_df.shape)
print('test shape:', test_x.shape)

train shape: (450, 439)
train X shape: (450, 430)
train y shape: (450, 6)

test X shape: (250, 439)
test shape: (250, 430)


In [14]:
def find_duplicate(df):
  return df.columns[df.columns.duplicated()].tolist()

dup_x = find_duplicate(x)
dup_test_x = find_duplicate(test_x)
print('x_중복컬럼:', dup_x)
print('test_x_중복컬럼:', dup_test_x)

x_columns = set(x.columns)
test_x_columns = set(test_x.columns)

only_x = x_columns - test_x_columns
only_test_x = test_x_columns - x_columns
print('only_x:', only_x)
print('only_x_test:', only_test_x)

x_중복컬럼: []
test_x_중복컬럼: []
only_x: set()
only_x_test: set()


# 종속변수별 features

In [15]:
# features = {
#     'Q1_feat': x.columns[x.columns.str.startswith('Q1')],
#     'Q2_feat': x.columns[x.columns.str.startswith('Q2')],
#     'Q3_feat': x.columns[x.columns.str.startswith('Q2')],
#     'S1_feat': x.columns[x.columns.str.startswith('S1')],
#     'S2_feat': x.columns[x.columns.str.startswith('S1')],
#     'S3_feat': x.columns[x.columns.str.startswith('S3')]
# }

# target_to_feat = {
#     'Q1': features['Q1_feat'],
#     'Q2': features['Q2_feat'],
#     'Q3': features['Q3_feat'],
#     'S1': features['S1_feat'],
#     'S2': features['S2_feat'],
#     'S3': features['S3_feat']
# }

In [16]:
feat = {
    'Q1': 'Q1',
    'Q2': 'Q2',
    'Q3': 'Q2',
    'S1': 'S1',
    'S2': 'S1',
    'S3': 'S3'
}

features = {k: x.columns[x.columns.str.startswith(v)] for k, v in feat.items()}

# 데이터 분할

In [17]:
targets_binary = ['Q1', 'Q2', 'Q3', 'S2', 'S3']
target_multi = ['S1']

split_data = {}
for col in targets_binary + target_multi:
    y = train_df[col]
    x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2, stratify=y, random_state=seed)
    split_data[col] = (x_train, x_val, y_train, y_val)

# smote + lgbm


In [18]:
# def check_smote_sample(split_data, col):
#     x_train, _, y_train, _ = split_data[col]

#     smote = SMOTE(sampling_strategy='minority', random_state=seed)

#     skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
#     for train_idx, _ in skf.split(x_train, y_train):
#         x_fold, y_fold = x_train.iloc[train_idx], y_train.iloc[train_idx]
#         x_resampled, y_resampled = smote.fit_resample(x_fold, y_fold)

#         print(f"Before SMOTE: {y_fold.value_counts().to_dict()}")
#         print(f"After SMOTE: {pd.Series(y_resampled).value_counts().to_dict()}")
#         print(f"x_resampled shape: {x_resampled.shape}")

#         break  # 한 fold만 확인

In [19]:
lgbm_best_param_dict = {}

def smote_optuna_binary(split_date):
  for col in targets_binary:
    print(f'=== target: {col} ===')
    x_train, x_val, y_train, y_val = split_data[col]


    def objective_binary(trial):
      params = {
          'learning_rate': trial.suggest_float("lr", 1e-5, 1e-1, log=True),
          'n_estimators': trial.suggest_int('n_estimators', 50, 500),
          'max_depth': trial.suggest_int('max_depth', 2, 32),
          'num_leaves': trial.suggest_int('num_leaves', 16, 64),
          'subsample': trial.suggest_float('subsample', 0.5, 1.0),
          'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
          'reg_alpha': trial.suggest_float('reg_alpha', 1e-3, 10.0, log=True),
          'reg_lambda': trial.suggest_float('reg_lambda', 1e-3, 10.0, log=True),
          'path_smooth' : trial.suggest_loguniform('path_smooth', 1e-8, 1e-3),
          'num_leaves' : trial.suggest_int('num_leaves', 30, 200),
          'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 10, 100),
          'max_bin' : trial.suggest_int('max_bin', 100, 255),
          'feature_fraction' : trial.suggest_uniform('feature_fraction', 0.5, 0.9),
          'bagging_fraction' : trial.suggest_uniform('bagging_fraction', 0.5, 0.9),
          'random_state': seed,
          'n_jobs': -1,
          'verbosity': -1
      }

      feat = features[col]
      clf_binary = LGBMClassifier(**params)
      pipeline = imbalanced_make_pipeline(SMOTE(sampling_strategy='minority', random_state=seed), clf_binary)

      cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
      score = cross_val_score(pipeline, x_train[feat], y_train, cv=cv, scoring='f1').mean()

      trial.report(score, step=0)
      if trial.should_prune():
        raise optuna.exceptions.TrialPruned()

      return score

    study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=seed))
    study.optimize(objective_binary, n_trials=50)

    best_params = study.best_params
    best_score = study.best_value
    lgbm_best_param_dict[col] = best_params
    print(f"{col} 최적 파라미터: {best_params}")
    print(f"{col} 최고 F1 점수: {best_score:.4f}")
    print("=" * 50, "\n")

In [20]:
smote_optuna_binary(split_data)

[I 2025-10-24 06:23:21,817] A new study created in memory with name: no-name-3e228266-d16d-4362-843a-6f447d2c69f1


=== target: Q1 ===


[I 2025-10-24 06:23:23,557] Trial 0 finished with value: 0.5161165090159672 and parameters: {'lr': 1.5106249671860432e-05, 'n_estimators': 339, 'max_depth': 31, 'num_leaves': 17, 'subsample': 0.5023240118702073, 'colsample_bytree': 0.7526598267090864, 'reg_alpha': 0.6834515648539062, 'reg_lambda': 4.603537484050716, 'path_smooth': 4.1267228833656876e-08, 'min_data_in_leaf': 74, 'max_bin': 172, 'feature_fraction': 0.6605739639515698, 'bagging_fraction': 0.592452365530872}. Best is trial 0 with value: 0.5161165090159672.
[I 2025-10-24 06:23:25,830] Trial 1 finished with value: 0.509876431081947 and parameters: {'lr': 2.708916662683437e-05, 'n_estimators': 186, 'max_depth': 4, 'num_leaves': 63, 'subsample': 0.5372630344763973, 'colsample_bytree': 0.849442817276649, 'reg_alpha': 0.09266314028306984, 'reg_lambda': 1.1663679970572534, 'path_smooth': 2.0788347659089347e-06, 'min_data_in_leaf': 50, 'max_bin': 141, 'feature_fraction': 0.599451423457273, 'bagging_fraction': 0.7580210670788405}. 

Q1 최적 파라미터: {'lr': 1.1829240481692007e-05, 'n_estimators': 303, 'max_depth': 13, 'num_leaves': 32, 'subsample': 0.8986806250207752, 'colsample_bytree': 0.5781104978571359, 'reg_alpha': 0.09024901265702105, 'reg_lambda': 0.0014611929574279922, 'path_smooth': 0.00011137009223515029, 'min_data_in_leaf': 100, 'max_bin': 105, 'feature_fraction': 0.8522577269149394, 'bagging_fraction': 0.6861650259370463}
Q1 최고 F1 점수: 0.5766

=== target: Q2 ===


[I 2025-10-24 06:23:45,132] Trial 0 finished with value: 0.5892650878912111 and parameters: {'lr': 1.5106249671860432e-05, 'n_estimators': 339, 'max_depth': 31, 'num_leaves': 17, 'subsample': 0.5023240118702073, 'colsample_bytree': 0.7526598267090864, 'reg_alpha': 0.6834515648539062, 'reg_lambda': 4.603537484050716, 'path_smooth': 4.1267228833656876e-08, 'min_data_in_leaf': 74, 'max_bin': 172, 'feature_fraction': 0.6605739639515698, 'bagging_fraction': 0.592452365530872}. Best is trial 0 with value: 0.5892650878912111.
[I 2025-10-24 06:23:45,474] Trial 1 finished with value: 0.5852110001058486 and parameters: {'lr': 2.708916662683437e-05, 'n_estimators': 186, 'max_depth': 4, 'num_leaves': 63, 'subsample': 0.5372630344763973, 'colsample_bytree': 0.849442817276649, 'reg_alpha': 0.09266314028306984, 'reg_lambda': 1.1663679970572534, 'path_smooth': 2.0788347659089347e-06, 'min_data_in_leaf': 50, 'max_bin': 141, 'feature_fraction': 0.599451423457273, 'bagging_fraction': 0.7580210670788405}.

Q2 최적 파라미터: {'lr': 0.06497900379664574, 'n_estimators': 391, 'max_depth': 18, 'num_leaves': 51, 'subsample': 0.7582979267239988, 'colsample_bytree': 0.8651906198740047, 'reg_alpha': 0.11239070473982651, 'reg_lambda': 1.3050956051665916, 'path_smooth': 6.60351807814746e-06, 'min_data_in_leaf': 72, 'max_bin': 176, 'feature_fraction': 0.7304175673398711, 'bagging_fraction': 0.7446920499023151}
Q2 최고 F1 점수: 0.6655

=== target: Q3 ===


[I 2025-10-24 06:24:12,320] Trial 0 finished with value: 0.6000479754127162 and parameters: {'lr': 1.5106249671860432e-05, 'n_estimators': 339, 'max_depth': 31, 'num_leaves': 17, 'subsample': 0.5023240118702073, 'colsample_bytree': 0.7526598267090864, 'reg_alpha': 0.6834515648539062, 'reg_lambda': 4.603537484050716, 'path_smooth': 4.1267228833656876e-08, 'min_data_in_leaf': 74, 'max_bin': 172, 'feature_fraction': 0.6605739639515698, 'bagging_fraction': 0.592452365530872}. Best is trial 0 with value: 0.6000479754127162.
[I 2025-10-24 06:24:12,705] Trial 1 finished with value: 0.5904364210087103 and parameters: {'lr': 2.708916662683437e-05, 'n_estimators': 186, 'max_depth': 4, 'num_leaves': 63, 'subsample': 0.5372630344763973, 'colsample_bytree': 0.849442817276649, 'reg_alpha': 0.09266314028306984, 'reg_lambda': 1.1663679970572534, 'path_smooth': 2.0788347659089347e-06, 'min_data_in_leaf': 50, 'max_bin': 141, 'feature_fraction': 0.599451423457273, 'bagging_fraction': 0.7580210670788405}.

Q3 최적 파라미터: {'lr': 0.013604904210327118, 'n_estimators': 430, 'max_depth': 24, 'num_leaves': 19, 'subsample': 0.5645433380750695, 'colsample_bytree': 0.593252942688302, 'reg_alpha': 0.0013643575284378597, 'reg_lambda': 0.2438984658544583, 'path_smooth': 1.8137214232302684e-05, 'min_data_in_leaf': 49, 'max_bin': 208, 'feature_fraction': 0.8649173762764597, 'bagging_fraction': 0.8797974768111675}
Q3 최고 F1 점수: 0.6428

=== target: S2 ===


[I 2025-10-24 06:24:40,149] Trial 0 finished with value: 0.6449907618194064 and parameters: {'lr': 1.5106249671860432e-05, 'n_estimators': 339, 'max_depth': 31, 'num_leaves': 17, 'subsample': 0.5023240118702073, 'colsample_bytree': 0.7526598267090864, 'reg_alpha': 0.6834515648539062, 'reg_lambda': 4.603537484050716, 'path_smooth': 4.1267228833656876e-08, 'min_data_in_leaf': 74, 'max_bin': 172, 'feature_fraction': 0.6605739639515698, 'bagging_fraction': 0.592452365530872}. Best is trial 0 with value: 0.6449907618194064.
[I 2025-10-24 06:24:40,549] Trial 1 finished with value: 0.6770066936054872 and parameters: {'lr': 2.708916662683437e-05, 'n_estimators': 186, 'max_depth': 4, 'num_leaves': 63, 'subsample': 0.5372630344763973, 'colsample_bytree': 0.849442817276649, 'reg_alpha': 0.09266314028306984, 'reg_lambda': 1.1663679970572534, 'path_smooth': 2.0788347659089347e-06, 'min_data_in_leaf': 50, 'max_bin': 141, 'feature_fraction': 0.599451423457273, 'bagging_fraction': 0.7580210670788405}.

S2 최적 파라미터: {'lr': 6.241834334580301e-05, 'n_estimators': 86, 'max_depth': 15, 'num_leaves': 33, 'subsample': 0.9989160263482014, 'colsample_bytree': 0.8938211520593293, 'reg_alpha': 0.009407778153257707, 'reg_lambda': 0.002673757228442137, 'path_smooth': 0.00013962002606948107, 'min_data_in_leaf': 36, 'max_bin': 148, 'feature_fraction': 0.747759888683967, 'bagging_fraction': 0.5525298429799801}
S2 최고 F1 점수: 0.7382

=== target: S3 ===


[I 2025-10-24 06:25:11,454] Trial 0 finished with value: 0.6721487363011842 and parameters: {'lr': 1.5106249671860432e-05, 'n_estimators': 339, 'max_depth': 31, 'num_leaves': 17, 'subsample': 0.5023240118702073, 'colsample_bytree': 0.7526598267090864, 'reg_alpha': 0.6834515648539062, 'reg_lambda': 4.603537484050716, 'path_smooth': 4.1267228833656876e-08, 'min_data_in_leaf': 74, 'max_bin': 172, 'feature_fraction': 0.6605739639515698, 'bagging_fraction': 0.592452365530872}. Best is trial 0 with value: 0.6721487363011842.
[I 2025-10-24 06:25:11,863] Trial 1 finished with value: 0.6900624286949082 and parameters: {'lr': 2.708916662683437e-05, 'n_estimators': 186, 'max_depth': 4, 'num_leaves': 63, 'subsample': 0.5372630344763973, 'colsample_bytree': 0.849442817276649, 'reg_alpha': 0.09266314028306984, 'reg_lambda': 1.1663679970572534, 'path_smooth': 2.0788347659089347e-06, 'min_data_in_leaf': 50, 'max_bin': 141, 'feature_fraction': 0.599451423457273, 'bagging_fraction': 0.7580210670788405}.

S3 최적 파라미터: {'lr': 0.011521486437934367, 'n_estimators': 297, 'max_depth': 31, 'num_leaves': 32, 'subsample': 0.6876643499326835, 'colsample_bytree': 0.9988460215873797, 'reg_alpha': 0.001567427707805425, 'reg_lambda': 0.0031564855329310735, 'path_smooth': 5.184842849523365e-05, 'min_data_in_leaf': 16, 'max_bin': 243, 'feature_fraction': 0.5385931013332868, 'bagging_fraction': 0.5609535482447322}
S3 최고 F1 점수: 0.7545



In [21]:
def smote_optuna_multi(split_date):
  for col in target_multi:
    print(f'=== target: {col} ===')
    x_train, x_val, y_train, y_val = split_data[col]

    def objective_multi(trial):
      params = {
          'learning_rate': trial.suggest_float("lr", 1e-5, 1e-1, log=True),
          'n_estimators': trial.suggest_int('n_estimators', 50, 500),
          'max_depth': trial.suggest_int('max_depth', 2, 32),
          'num_leaves': trial.suggest_int('num_leaves', 16, 64),
          'subsample': trial.suggest_float('subsample', 0.5, 1.0),
          'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
          'reg_alpha': trial.suggest_float('reg_alpha', 1e-3, 10.0, log=True),
          'reg_lambda': trial.suggest_float('reg_lambda', 1e-3, 10.0, log=True),
          'path_smooth' : trial.suggest_loguniform('path_smooth', 1e-8, 1e-3),
          'num_leaves' : trial.suggest_int('num_leaves', 30, 200),
          'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 10, 100),
          'max_bin' : trial.suggest_int('max_bin', 100, 255),
          'feature_fraction' : trial.suggest_uniform('feature_fraction', 0.5, 0.9),
          'bagging_fraction' : trial.suggest_uniform('bagging_fraction', 0.5, 0.9),
          'random_state': seed,
          'n_jobs': -1,
          'verbosity': -1,
          'objective': 'multiclass',
          'num_class': 3
      }

      feat = features[col]

      clf_binary = LGBMClassifier(**params)
      pipeline = imbalanced_make_pipeline(SMOTE(sampling_strategy='not majority', random_state=seed), clf_binary)

      cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
      score_macro = cross_val_score(pipeline, x_train, y_train, cv=cv, scoring='f1_macro').mean()

      trial.report(score_macro, step=0)
      if trial.should_prune():
        raise optuna.exceptions.TrialPruned()

      return score_macro

    study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=seed))
    study.optimize(objective_multi, n_trials=50)

    best_params = study.best_params
    best_score = study.best_value
    lgbm_best_param_dict[col] = best_params
    print(f"{col} 최적 파라미터: {best_params}")
    print(f"{col} 최고 F1 점수: {best_score:.4f}")
    print("=" * 50, "\n")

In [22]:
smote_optuna_multi(split_data)

[I 2025-10-24 06:26:09,037] A new study created in memory with name: no-name-69c0727a-3c6e-4b30-9da6-72b4b46d5055


=== target: S1 ===


[I 2025-10-24 06:26:11,400] Trial 0 finished with value: 0.3826401341704198 and parameters: {'lr': 1.5106249671860432e-05, 'n_estimators': 339, 'max_depth': 31, 'num_leaves': 17, 'subsample': 0.5023240118702073, 'colsample_bytree': 0.7526598267090864, 'reg_alpha': 0.6834515648539062, 'reg_lambda': 4.603537484050716, 'path_smooth': 4.1267228833656876e-08, 'min_data_in_leaf': 74, 'max_bin': 172, 'feature_fraction': 0.6605739639515698, 'bagging_fraction': 0.592452365530872}. Best is trial 0 with value: 0.3826401341704198.
[I 2025-10-24 06:26:13,686] Trial 1 finished with value: 0.40401065451483265 and parameters: {'lr': 2.708916662683437e-05, 'n_estimators': 186, 'max_depth': 4, 'num_leaves': 63, 'subsample': 0.5372630344763973, 'colsample_bytree': 0.849442817276649, 'reg_alpha': 0.09266314028306984, 'reg_lambda': 1.1663679970572534, 'path_smooth': 2.0788347659089347e-06, 'min_data_in_leaf': 50, 'max_bin': 141, 'feature_fraction': 0.599451423457273, 'bagging_fraction': 0.7580210670788405}

S1 최적 파라미터: {'lr': 0.09452949172777327, 'n_estimators': 137, 'max_depth': 31, 'num_leaves': 45, 'subsample': 0.7304501634834658, 'colsample_bytree': 0.9826086606626508, 'reg_alpha': 0.003785001012054541, 'reg_lambda': 0.12532041335898567, 'path_smooth': 0.0006079042464351565, 'min_data_in_leaf': 69, 'max_bin': 112, 'feature_fraction': 0.7634096253090628, 'bagging_fraction': 0.8655379652078898}
S1 최고 F1 점수: 0.4673



In [23]:
fix_param_binary = {'random_state': seed,
                    'n_jobs': -1,
                    'verbosity': -1
                    }

fix_param_multi = {'random_sate': seed,
                   'n_jobs': -1,
                   'verbosity': -1,
                   'objective': 'multiclass',
                   'num_class': 3
                   }

In [24]:
def lgbm_smote(split_data, lgbm_best_param_dict, test_x):
  preds = {}

  for col_binary in targets_binary:
    print(col_binary)
    x_train, x_val, y_train, y_val = split_data[col_binary]
    best_param = lgbm_best_param_dict[col_binary]
    feat = features[col_binary]

    clf_binary = LGBMClassifier(**best_param, **fix_param_binary)

    smote = SMOTE(sampling_strategy='minority', random_state=seed)
    x_train_over, y_train_over = smote.fit_resample(x_train[feat], y_train)

    clf_binary.fit(
        x_train_over, y_train_over,
        eval_set=[(x_val[feat], y_val)],
        callbacks=[early_stopping(10), log_evaluation(0)],
        eval_metric='logloss'
    )

    y_pred = clf_binary.predict(x_val[feat])

    f1 = f1_score(y_val, y_pred)
    train_acc = clf_binary.score(x_train[feat], y_train)
    val_acc = clf_binary.score(x_val[feat], y_val)
    print(f'{col_binary} -> Train Acc: {train_acc:4f} | Validation Accucary: {val_acc:.4f} | Validation F1_score: {f1:.4f}')
    print()

    preds[col_binary] = clf_binary.predict(test_x[feat])

  for col_multi in target_multi:
    print(col_multi)
    x_train, x_val, y_train, y_val = split_data[col_multi]
    best_param = lgbm_best_param_dict[col_multi]
    feat = features[col_multi]

    clf_multi = LGBMClassifier(**best_param, **fix_param_multi)

    smote = SMOTE(sampling_strategy='not majority', random_state=seed)
    x_train_over, y_train_over = smote.fit_resample(x_train[feat], y_train)

    clf_multi.fit(
        x_train_over, y_train_over,
        eval_set=[(x_val[feat], y_val)],
        callbacks=[early_stopping(10), log_evaluation(0)],
        eval_metric='multi_logloss'
    )

    y_pred = clf_multi.predict(x_val[feat])

    f1 = f1_score(y_val, y_pred, average='macro')
    train_acc = clf_multi.score(x_train[feat], y_train)
    val_acc = clf_multi.score(x_val[feat], y_val)
    print(f'{col_multi} -> Train Acc: {train_acc:4f} | Validation Accucary: {val_acc:.4f} | Validation F1_score: {f1:.4f}')
    print()

    preds[col_multi] = clf_multi.predict(test_x[feat])

  return preds

In [25]:
lgbm_best_param_dict

{'Q1': {'lr': 1.1829240481692007e-05,
  'n_estimators': 303,
  'max_depth': 13,
  'num_leaves': 32,
  'subsample': 0.8986806250207752,
  'colsample_bytree': 0.5781104978571359,
  'reg_alpha': 0.09024901265702105,
  'reg_lambda': 0.0014611929574279922,
  'path_smooth': 0.00011137009223515029,
  'min_data_in_leaf': 100,
  'max_bin': 105,
  'feature_fraction': 0.8522577269149394,
  'bagging_fraction': 0.6861650259370463},
 'Q2': {'lr': 0.06497900379664574,
  'n_estimators': 391,
  'max_depth': 18,
  'num_leaves': 51,
  'subsample': 0.7582979267239988,
  'colsample_bytree': 0.8651906198740047,
  'reg_alpha': 0.11239070473982651,
  'reg_lambda': 1.3050956051665916,
  'path_smooth': 6.60351807814746e-06,
  'min_data_in_leaf': 72,
  'max_bin': 176,
  'feature_fraction': 0.7304175673398711,
  'bagging_fraction': 0.7446920499023151},
 'Q3': {'lr': 0.013604904210327118,
  'n_estimators': 430,
  'max_depth': 24,
  'num_leaves': 19,
  'subsample': 0.5645433380750695,
  'colsample_bytree': 0.593252

In [26]:
preds_lgbm = lgbm_smote(split_data, lgbm_best_param_dict, test_x)

Q1
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[27]	valid_0's binary_logloss: 0.678595
Q1 -> Train Acc: 0.636111 | Validation Accucary: 0.5333 | Validation F1_score: 0.5532

Q2
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's binary_logloss: 0.695147
Q2 -> Train Acc: 0.613889 | Validation Accucary: 0.4444 | Validation F1_score: 0.3056

Q3
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[42]	valid_0's binary_logloss: 0.628176
Q3 -> Train Acc: 0.847222 | Validation Accucary: 0.6333 | Validation F1_score: 0.7130

S2
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[22]	valid_0's binary_logloss: 0.622367
S2 -> Train Acc: 0.905556 | Validation Accucary: 0.6111 | Validation F1_score: 0.6847

S3
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is

# smote + xgb

In [27]:
xgb_best_param_dict = {}

def smote_optuna_binary(split_date):
  for col in targets_binary:
    print(f'=== target: {col} ===')
    x_train, x_val, y_train, y_val = split_data[col]


    def objective_binary(trial):
      params = {
        'learning_rate': trial.suggest_float("lr", 1e-5, 1e-1, log=True),
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 1e-3, 10.0, log=True),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-3, 10.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-3, 10.0, log=True),
        'random_state': seed,
        'n_jobs': -1,
        'verbosity': 0
        }
      feat = features[col]
      clf_binary = XGBClassifier(**params)
      pipeline = imbalanced_make_pipeline(SMOTE(sampling_strategy='minority', random_state=seed), clf_binary)

      cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
      score = cross_val_score(pipeline, x_train[feat], y_train, cv=cv, scoring='f1').mean()

      trial.report(score, step=0)
      if trial.should_prune():
        raise optuna.exceptions.TrialPruned()

      return score

    study = optuna.create_study(direction="maximize")
    study.optimize(objective_binary, n_trials=50)

    best_params = study.best_params
    best_score = study.best_value
    xgb_best_param_dict[col] = best_params
    print(f"{col} 최적 파라미터: {best_params}")
    print(f"{col} 최고 F1 점수: {best_score:.4f}")
    print("=" * 50, "\n")

In [28]:
smote_optuna_binary(split_data)

[I 2025-10-24 06:29:33,293] A new study created in memory with name: no-name-2c4e8182-594d-45e8-845f-c0842878a214


=== target: Q1 ===


[I 2025-10-24 06:29:35,770] Trial 0 finished with value: 0.5534288081429756 and parameters: {'lr': 2.7082844995150635e-05, 'n_estimators': 64, 'max_depth': 9, 'subsample': 0.721288685250457, 'colsample_bytree': 0.6712866073648217, 'gamma': 0.007415442580646419, 'reg_alpha': 0.005665555708359325, 'reg_lambda': 0.7251860506765602}. Best is trial 0 with value: 0.5534288081429756.
[I 2025-10-24 06:29:37,453] Trial 1 finished with value: 0.5226837062146651 and parameters: {'lr': 0.02657395967129854, 'n_estimators': 390, 'max_depth': 10, 'subsample': 0.7565078982293211, 'colsample_bytree': 0.5227252726210436, 'gamma': 0.0056585913222279936, 'reg_alpha': 0.0017414163373460222, 'reg_lambda': 0.0090347455567185}. Best is trial 0 with value: 0.5534288081429756.
[I 2025-10-24 06:29:38,578] Trial 2 finished with value: 0.5069494261119082 and parameters: {'lr': 0.000743409664789288, 'n_estimators': 257, 'max_depth': 8, 'subsample': 0.8095410701450123, 'colsample_bytree': 0.8235830710528707, 'gamma'

Q1 최적 파라미터: {'lr': 0.005158227045382019, 'n_estimators': 500, 'max_depth': 6, 'subsample': 0.5014653515689952, 'colsample_bytree': 0.9874942536147016, 'gamma': 0.05156440289130511, 'reg_alpha': 9.220831718455408, 'reg_lambda': 0.0735303974249241}
Q1 최고 F1 점수: 0.5820

=== target: Q2 ===


[I 2025-10-24 06:30:45,526] Trial 0 finished with value: 0.6260071505075107 and parameters: {'lr': 0.002666100488728505, 'n_estimators': 219, 'max_depth': 7, 'subsample': 0.633084841535613, 'colsample_bytree': 0.5607374309310948, 'gamma': 0.5101357845474125, 'reg_alpha': 0.0044860455270858066, 'reg_lambda': 0.020993523317422887}. Best is trial 0 with value: 0.6260071505075107.
[I 2025-10-24 06:30:46,252] Trial 1 finished with value: 0.6170434503767838 and parameters: {'lr': 0.053112930718709796, 'n_estimators': 257, 'max_depth': 5, 'subsample': 0.5717456553832535, 'colsample_bytree': 0.7639624404642442, 'gamma': 1.0572489304279908, 'reg_alpha': 3.394998253042146, 'reg_lambda': 0.3303834159537544}. Best is trial 0 with value: 0.6260071505075107.
[I 2025-10-24 06:30:47,515] Trial 2 finished with value: 0.5841203613175632 and parameters: {'lr': 0.0002905407582022655, 'n_estimators': 413, 'max_depth': 3, 'subsample': 0.9370034049195132, 'colsample_bytree': 0.9328962513898975, 'gamma': 0.00

Q2 최적 파라미터: {'lr': 0.007253718909191384, 'n_estimators': 432, 'max_depth': 3, 'subsample': 0.5038814517500667, 'colsample_bytree': 0.8406429214642972, 'gamma': 0.17523222455117982, 'reg_alpha': 1.5438489479832427, 'reg_lambda': 0.001091359355825587}
Q2 최고 F1 점수: 0.6479

=== target: Q3 ===


[I 2025-10-24 06:32:08,109] Trial 0 finished with value: 0.6629905416919522 and parameters: {'lr': 0.0008877293109075185, 'n_estimators': 368, 'max_depth': 12, 'subsample': 0.6370953879556039, 'colsample_bytree': 0.796547476177212, 'gamma': 0.0029177876285568093, 'reg_alpha': 0.0010536704045484907, 'reg_lambda': 0.00165288749320443}. Best is trial 0 with value: 0.6629905416919522.
[I 2025-10-24 06:32:08,991] Trial 1 finished with value: 0.6212131450798064 and parameters: {'lr': 0.024838799220349844, 'n_estimators': 185, 'max_depth': 5, 'subsample': 0.6018454568146132, 'colsample_bytree': 0.7960320839243126, 'gamma': 0.05270272431781748, 'reg_alpha': 2.0795925465995384, 'reg_lambda': 1.6175855423152434}. Best is trial 0 with value: 0.6629905416919522.
[I 2025-10-24 06:32:12,197] Trial 2 finished with value: 0.6318510949356896 and parameters: {'lr': 0.00951825647822062, 'n_estimators': 259, 'max_depth': 3, 'subsample': 0.8211952990360312, 'colsample_bytree': 0.930451957549183, 'gamma': 0

Q3 최적 파라미터: {'lr': 0.0004404216002912915, 'n_estimators': 324, 'max_depth': 12, 'subsample': 0.5226924038998063, 'colsample_bytree': 0.8331966130714386, 'gamma': 0.0015323702267678312, 'reg_alpha': 0.022735694948487355, 'reg_lambda': 0.0013044544681549623}
Q3 최고 F1 점수: 0.6689

=== target: S2 ===


[I 2025-10-24 06:33:46,864] Trial 0 finished with value: 0.7205888673973779 and parameters: {'lr': 0.0002410445304180822, 'n_estimators': 121, 'max_depth': 12, 'subsample': 0.6342449389334963, 'colsample_bytree': 0.6364833450977291, 'gamma': 0.12001712197350589, 'reg_alpha': 0.5462207482695721, 'reg_lambda': 0.4615812380119016}. Best is trial 0 with value: 0.7205888673973779.
[I 2025-10-24 06:33:49,263] Trial 1 finished with value: 0.6932139913004549 and parameters: {'lr': 0.0026438706878197517, 'n_estimators': 304, 'max_depth': 4, 'subsample': 0.9415615245475621, 'colsample_bytree': 0.5058472419283702, 'gamma': 0.015641755954584193, 'reg_alpha': 0.019936632076531482, 'reg_lambda': 0.08891632049178586}. Best is trial 0 with value: 0.7205888673973779.
[I 2025-10-24 06:33:58,510] Trial 2 finished with value: 0.7327681023533559 and parameters: {'lr': 0.013812471135604795, 'n_estimators': 497, 'max_depth': 9, 'subsample': 0.767585581375442, 'colsample_bytree': 0.7104172481824911, 'gamma': 

S2 최적 파라미터: {'lr': 0.04614071108938825, 'n_estimators': 450, 'max_depth': 8, 'subsample': 0.9421204688567754, 'colsample_bytree': 0.5304800114088037, 'gamma': 0.016060080731164703, 'reg_alpha': 0.1362755293398884, 'reg_lambda': 0.7591868489598357}
S2 최고 F1 점수: 0.7489

=== target: S3 ===


[I 2025-10-24 06:38:18,165] Trial 0 finished with value: 0.7052602753881526 and parameters: {'lr': 9.805567061846455e-05, 'n_estimators': 136, 'max_depth': 6, 'subsample': 0.8141797896619805, 'colsample_bytree': 0.5417581648233354, 'gamma': 0.0040033576686604525, 'reg_alpha': 7.870409830608495, 'reg_lambda': 0.7208497333798164}. Best is trial 0 with value: 0.7052602753881526.
[I 2025-10-24 06:38:22,749] Trial 1 finished with value: 0.7449329205366356 and parameters: {'lr': 1.0692275192049048e-05, 'n_estimators': 220, 'max_depth': 8, 'subsample': 0.6747293562596282, 'colsample_bytree': 0.8897429874136478, 'gamma': 0.0222258209058594, 'reg_alpha': 0.005702566816762267, 'reg_lambda': 0.001586406296534713}. Best is trial 1 with value: 0.7449329205366356.
[I 2025-10-24 06:38:25,302] Trial 2 finished with value: 0.6865963804873831 and parameters: {'lr': 2.0987372486824175e-05, 'n_estimators': 408, 'max_depth': 6, 'subsample': 0.5860427874824861, 'colsample_bytree': 0.6253011330443927, 'gamma

S3 최적 파라미터: {'lr': 0.0020760891608048683, 'n_estimators': 465, 'max_depth': 11, 'subsample': 0.6763670646201911, 'colsample_bytree': 0.7129825955543805, 'gamma': 0.002491652513169936, 'reg_alpha': 0.010094081014090846, 'reg_lambda': 0.02831915891392152}
S3 최고 F1 점수: 0.7648



In [29]:
def smote_optuna_multi(split_date):
  for col in target_multi:
    print(f'=== target: {col} ===')
    x_train, x_val, y_train, y_val = split_data[col]

    def objective_multi(trial):
      params = {'learning_rate': trial.suggest_float("lr", 1e-5, 1e-1, log=True),
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 1e-3, 10.0, log=True),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-3, 10.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-3, 10.0, log=True),
        'random_state': seed,
        'n_jobs': -1,
        'verbosity': 2,
        'objective': 'multiclass',
        'num_class': 3
                }
      feat = features[col]

      clf_binary = XGBClassifier(**params)
      pipeline = imbalanced_make_pipeline(SMOTE(sampling_strategy='not majority', random_state=seed), clf_binary)

      cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
      score_macro = cross_val_score(pipeline, x_train, y_train, cv=cv, scoring='f1_macro').mean()

      trial.report(score_macro, step=0)
      if trial.should_prune():
        raise optuna.exceptions.TrialPruned()

      return score_macro

    study = optuna.create_study(direction="maximize")
    study.optimize(objective_multi, n_trials=50)

    best_params = study.best_params
    best_score = study.best_value
    xgb_best_param_dict[col] = best_params
    print(f"{col} 최적 파라미터: {best_params}")
    print(f"{col} 최고 F1 점수: {best_score:.4f}")
    print("=" * 50, "\n")

In [30]:
smote_optuna_multi(split_data)

[I 2025-10-24 06:43:50,729] A new study created in memory with name: no-name-3e1ef533-c877-44de-94dd-6a78eeb3f6b5


=== target: S1 ===
[06:43:50] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:44:01] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:44:12] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:44:23] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:44:31] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:44:42,503] Trial 0 finished with value: 0.40399263673863217 and parameters: {'lr': 0.00014281940155395315, 'n_estimators': 497, 'max_depth': 4, 'subsample': 0.8956681962941607, 'colsample_bytree': 0.8451898752107849, 'gamma': 1.0874641407119445, 'reg_alpha': 0.01805025184202124, 'reg_lambda': 5.75608892161817}. Best is trial 0 with value: 0.40399263673863217.


[06:44:42] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:44:50] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:44:56] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:45:05] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:45:11] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:45:19,733] Trial 1 finished with value: 0.40855220855681773 and parameters: {'lr': 0.0030425238224273494, 'n_estimators': 282, 'max_depth': 5, 'subsample': 0.8170044955249266, 'colsample_bytree': 0.6934582011355332, 'gamma': 0.18463023843735232, 'reg_alpha': 0.01723084519183287, 'reg_lambda': 3.737452869410432}. Best is trial 1 with value: 0.40855220855681773.


[06:45:19] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:45:30] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:45:41] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:45:50] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:46:01] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:46:12,861] Trial 2 finished with value: 0.4345088936755963 and parameters: {'lr': 0.000973387269552397, 'n_estimators': 349, 'max_depth': 11, 'subsample': 0.828358403539935, 'colsample_bytree': 0.5413167229163132, 'gamma': 0.1423720689247687, 'reg_alpha': 6.3383068489284, 'reg_lambda': 0.0024198436279509127}. Best is trial 2 with value: 0.4345088936755963.


[06:46:12] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:46:15] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:46:21] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:46:24] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:46:27] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:46:29,788] Trial 3 finished with value: 0.42166169409227033 and parameters: {'lr': 0.0015325540734799467, 'n_estimators': 174, 'max_depth': 6, 'subsample': 0.9010737473752246, 'colsample_bytree': 0.8004957483594911, 'gamma': 7.087785115496811, 'reg_alpha': 0.014010659212553796, 'reg_lambda': 0.002229610626367877}. Best is trial 2 with value: 0.4345088936755963.


[06:46:29] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:46:46] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:47:00] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:47:15] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:47:30] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:47:45,013] Trial 4 finished with value: 0.4412346171766114 and parameters: {'lr': 0.0030945877119748786, 'n_estimators': 334, 'max_depth': 11, 'subsample': 0.5331006231093145, 'colsample_bytree': 0.9312305076112499, 'gamma': 0.027678070607010517, 'reg_alpha': 0.013777644309204242, 'reg_lambda': 0.011198242583174664}. Best is trial 4 with value: 0.4412346171766114.


[06:47:45] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:47:54] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:48:05] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:48:12] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:48:21] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:48:31,231] Trial 5 finished with value: 0.4422522632008718 and parameters: {'lr': 0.016822994253461912, 'n_estimators': 458, 'max_depth': 11, 'subsample': 0.9403321660962451, 'colsample_bytree': 0.8614799387378269, 'gamma': 1.0189215040517856, 'reg_alpha': 0.3646495022496935, 'reg_lambda': 4.784078068897332}. Best is trial 5 with value: 0.4422522632008718.


[06:48:31] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:48:34] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:48:36] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:48:39] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:48:44] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:48:47,385] Trial 6 finished with value: 0.45535368009702204 and parameters: {'lr': 0.019690645838653882, 'n_estimators': 83, 'max_depth': 9, 'subsample': 0.6085034564361231, 'colsample_bytree': 0.6851814994506682, 'gamma': 0.0014058814360278238, 'reg_alpha': 0.0022597747975200266, 'reg_lambda': 1.9413575806725996}. Best is trial 6 with value: 0.45535368009702204.


[06:48:47] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:48:58] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:49:07] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:49:17] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:49:29] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:49:40,334] Trial 7 finished with value: 0.4376775085172146 and parameters: {'lr': 0.0037496604348903565, 'n_estimators': 282, 'max_depth': 8, 'subsample': 0.6763609816423746, 'colsample_bytree': 0.6199011154095346, 'gamma': 0.041350272878244616, 'reg_alpha': 0.04152953800493051, 'reg_lambda': 0.3060010965743098}. Best is trial 6 with value: 0.45535368009702204.


[06:49:40] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:49:45] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:49:53] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:49:58] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:50:06] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:50:13,340] Trial 8 finished with value: 0.45023299271919665 and parameters: {'lr': 2.1087303832515604e-05, 'n_estimators': 179, 'max_depth': 6, 'subsample': 0.5470547050405088, 'colsample_bytree': 0.91409009131873, 'gamma': 0.029774760465326185, 'reg_alpha': 0.0012199013737736934, 'reg_lambda': 7.813234619425836}. Best is trial 6 with value: 0.45535368009702204.


[06:50:13] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:50:20] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:50:28] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:50:33] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:50:41] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:50:47,518] Trial 9 pruned. 


[06:50:47] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:50:49] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:50:51] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:50:54] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:50:56] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:50:58,024] Trial 10 finished with value: 0.45623968794700503 and parameters: {'lr': 0.08859185318842767, 'n_estimators': 51, 'max_depth': 9, 'subsample': 0.658732262891285, 'colsample_bytree': 0.7064685420279516, 'gamma': 0.0011877283608786608, 'reg_alpha': 0.42286031491427334, 'reg_lambda': 0.0423429849605506}. Best is trial 10 with value: 0.45623968794700503.


[06:50:58] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:50:59] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:51:01] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:51:03] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:51:07] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:51:09,190] Trial 11 finished with value: 0.44579903178367264 and parameters: {'lr': 0.08153763482393175, 'n_estimators': 53, 'max_depth': 9, 'subsample': 0.6747033959887813, 'colsample_bytree': 0.7182939855514965, 'gamma': 0.0011885253257329308, 'reg_alpha': 0.27672779526018526, 'reg_lambda': 0.03616036742305682}. Best is trial 10 with value: 0.45623968794700503.


[06:51:09] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:51:10] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:51:12] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:51:13] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:51:15] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:51:16,867] Trial 12 finished with value: 0.4424291192237077 and parameters: {'lr': 0.08493356845302691, 'n_estimators': 53, 'max_depth': 9, 'subsample': 0.6444726908695082, 'colsample_bytree': 0.6483106727860879, 'gamma': 0.00508233210688347, 'reg_alpha': 2.0297577186003184, 'reg_lambda': 0.7755832362510479}. Best is trial 10 with value: 0.45623968794700503.


[06:51:16] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:51:23] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:51:28] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:51:34] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:51:39] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:51:46,222] Trial 13 finished with value: 0.46460401637957405 and parameters: {'lr': 0.022133791393320813, 'n_estimators': 139, 'max_depth': 8, 'subsample': 0.6138092569019429, 'colsample_bytree': 0.7681144934060891, 'gamma': 0.006299982727169865, 'reg_alpha': 0.24633071717464936, 'reg_lambda': 0.07069000899913704}. Best is trial 13 with value: 0.46460401637957405.


[06:51:46] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:51:51] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:51:58] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:52:04] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:52:12] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:52:17,563] Trial 14 pruned. 


[06:52:17] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:52:21] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:52:29] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:52:33] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:52:39] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:52:43,203] Trial 15 pruned. 


[06:52:43] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:52:45] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:52:48] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:52:53] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:52:55] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:52:58,552] Trial 16 pruned. 


[06:52:58] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:53:04] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:53:08] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:53:11] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:53:17] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:53:21,506] Trial 17 pruned. 


[06:53:21] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:53:24] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:53:26] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:53:32] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:53:34] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:53:37,699] Trial 18 pruned. 


[06:53:37] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:53:43] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:53:47] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:53:51] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:53:57] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:54:00,645] Trial 19 pruned. 


[06:54:00] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:54:03] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:54:06] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:54:11] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:54:14] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:54:16,888] Trial 20 pruned. 


[06:54:16] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:54:19] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:54:23] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:54:26] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:54:28] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:54:30,606] Trial 21 pruned. 


[06:54:30] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:54:36] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:54:40] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:54:43] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:54:49] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:54:53,001] Trial 22 pruned. 


[06:54:53] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:54:56] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:55:01] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:55:04] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:55:08] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:55:11,112] Trial 23 pruned. 


[06:55:11] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:55:18] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:55:23] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:55:31] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:55:36] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:55:44,053] Trial 24 finished with value: 0.46428418070464694 and parameters: {'lr': 0.029457203900776767, 'n_estimators': 219, 'max_depth': 9, 'subsample': 0.5046660627372277, 'colsample_bytree': 0.717950705320649, 'gamma': 0.007663229298155008, 'reg_alpha': 0.23690001536221472, 'reg_lambda': 0.2599161718726468}. Best is trial 13 with value: 0.46460401637957405.


[06:55:44] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:55:48] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:55:54] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:55:58] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:56:03] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:56:09,961] Trial 25 finished with value: 0.4427941492790407 and parameters: {'lr': 0.04379975308350594, 'n_estimators': 213, 'max_depth': 7, 'subsample': 0.5057819374541411, 'colsample_bytree': 0.7251361075524536, 'gamma': 0.0072077104323396155, 'reg_alpha': 0.203942588931663, 'reg_lambda': 0.21215817066703085}. Best is trial 13 with value: 0.46460401637957405.


[06:56:10] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:56:18] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:56:26] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:56:36] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:56:47] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:56:55,447] Trial 26 pruned. 


[06:56:55] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:57:02] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:57:07] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:57:14] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:57:19] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:57:26,275] Trial 27 finished with value: 0.4540102975824979 and parameters: {'lr': 0.027915868510955608, 'n_estimators': 147, 'max_depth': 10, 'subsample': 0.5663929835538588, 'colsample_bytree': 0.8856543358386744, 'gamma': 0.002838846817584386, 'reg_alpha': 0.6932812963917486, 'reg_lambda': 0.023246820151562375}. Best is trial 13 with value: 0.46460401637957405.


[06:57:26] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:57:31] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:57:39] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:57:44] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:57:51] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:57:56,993] Trial 28 pruned. 


[06:57:57] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:58:15] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:58:34] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:58:52] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:59:13] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:59:31,225] Trial 29 pruned. 


[06:59:31] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:59:37] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[06:59:41] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:59:44] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:59:51] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 06:59:54,764] Trial 30 finished with value: 0.4449438711852138 and parameters: {'lr': 0.005776956228611499, 'n_estimators': 243, 'max_depth': 4, 'subsample': 0.7806807671020184, 'colsample_bytree': 0.7467201738554423, 'gamma': 0.08153168638655209, 'reg_alpha': 0.03457955712419432, 'reg_lambda': 0.04405797755470329}. Best is trial 13 with value: 0.46460401637957405.


[06:59:54] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[06:59:57] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[07:00:03] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:00:06] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:00:09] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 07:00:14,310] Trial 31 pruned. 


[07:00:14] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:00:16] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[07:00:18] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:00:20] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:00:22] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 07:00:24,037] Trial 32 pruned. 


[07:00:24] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:00:29] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[07:00:33] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:00:37] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:00:43] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 07:00:46,425] Trial 33 finished with value: 0.4697647179300838 and parameters: {'lr': 0.05160665222603649, 'n_estimators': 156, 'max_depth': 9, 'subsample': 0.5821378530908934, 'colsample_bytree': 0.5855602464031099, 'gamma': 0.006690316987727416, 'reg_alpha': 0.38435427498777225, 'reg_lambda': 2.0606678446638806}. Best is trial 33 with value: 0.4697647179300838.


[07:00:46] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:00:49] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[07:00:54] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:00:58] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:01:01] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 07:01:04,364] Trial 34 finished with value: 0.44803414230584854 and parameters: {'lr': 0.062401051872323095, 'n_estimators': 159, 'max_depth': 10, 'subsample': 0.5642579061970014, 'colsample_bytree': 0.5672279464791637, 'gamma': 0.007538096427346508, 'reg_alpha': 0.40337140941002664, 'reg_lambda': 1.1507390057933022}. Best is trial 33 with value: 0.4697647179300838.


[07:01:04] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:01:12] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[07:01:16] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:01:22] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:01:26] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 07:01:30,429] Trial 35 pruned. 


[07:01:30] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:01:37] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[07:01:41] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:01:48] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:01:52] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 07:01:59,679] Trial 36 pruned. 


[07:01:59] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:02:02] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[07:02:05] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:02:07] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:02:12] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 07:02:15,689] Trial 37 pruned. 


[07:02:15] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:02:20] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[07:02:27] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:02:32] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:02:39] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 07:02:43,958] Trial 38 pruned. 


[07:02:44] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:03:07] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[07:03:27] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:03:50] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:04:14] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 07:04:37,538] Trial 39 pruned. 


[07:04:37] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:04:42] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[07:04:49] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:04:54] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:04:59] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 07:05:06,041] Trial 40 finished with value: 0.4512103702281478 and parameters: {'lr': 0.014467063329249009, 'n_estimators': 190, 'max_depth': 6, 'subsample': 0.7727298784323651, 'colsample_bytree': 0.6334962212534178, 'gamma': 0.05812256592802063, 'reg_alpha': 0.7806691971664603, 'reg_lambda': 0.0014610257096663766}. Best is trial 33 with value: 0.4697647179300838.


[07:05:06] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:05:09] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[07:05:12] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:05:17] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:05:20] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 07:05:24,077] Trial 41 pruned. 


[07:05:24] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:05:31] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[07:05:38] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:05:45] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:05:50] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 07:05:57,162] Trial 42 pruned. 


[07:05:57] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:05:59] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[07:06:01] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:06:03] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:06:08] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 07:06:10,417] Trial 43 pruned. 


[07:06:10] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:06:14] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[07:06:20] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:06:24] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:06:28] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 07:06:34,932] Trial 44 pruned. 


[07:06:35] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:06:37] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[07:06:38] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:06:40] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:06:42] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 07:06:47,070] Trial 45 pruned. 


[07:06:47] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:06:54] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[07:07:04] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:07:15] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:07:25] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 07:07:33,248] Trial 46 finished with value: 0.4624257231021997 and parameters: {'lr': 0.014627649930428355, 'n_estimators': 303, 'max_depth': 11, 'subsample': 0.5227900409656653, 'colsample_bytree': 0.7113587505226334, 'gamma': 0.12591196216520809, 'reg_alpha': 0.002806461429444396, 'reg_lambda': 0.22552642340264675}. Best is trial 33 with value: 0.4697647179300838.


[07:07:33] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:07:43] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[07:07:53] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:08:00] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:08:10] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 07:08:20,820] Trial 47 finished with value: 0.4539184172457745 and parameters: {'lr': 0.015162958945749339, 'n_estimators': 311, 'max_depth': 11, 'subsample': 0.5265335831513014, 'colsample_bytree': 0.7216835048429172, 'gamma': 0.14945717464867314, 'reg_alpha': 0.2937613318137597, 'reg_lambda': 0.29031391411004226}. Best is trial 33 with value: 0.4697647179300838.


[07:08:21] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:08:25] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[07:08:31] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:08:36] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:08:40] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 07:08:46,908] Trial 48 pruned. 


[07:08:47] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:08:53] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 430, 185760).
[07:09:03] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:09:12] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).
[07:09:19] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (429, 430, 184470).


[I 2025-10-24 07:09:29,055] Trial 49 pruned. 


S1 최적 파라미터: {'lr': 0.05160665222603649, 'n_estimators': 156, 'max_depth': 9, 'subsample': 0.5821378530908934, 'colsample_bytree': 0.5855602464031099, 'gamma': 0.006690316987727416, 'reg_alpha': 0.38435427498777225, 'reg_lambda': 2.0606678446638806}
S1 최고 F1 점수: 0.4698



In [31]:
fix_param_binary = {'random_state': seed,
                    'n_jobs': -1,
                    'verbosity': 2
                    }

fix_param_multi = {'random_sate': seed,
                   'n_jobs': -1,
                   'verbosity': 2,
                   'objective': 'multiclass',
                   'num_class': 3
                   }

In [32]:
import xgboost as xgb
xgb.set_config(verbosity=0)

In [33]:
def xgb_smote(split_data, xgb_best_param_dict, test_x):
  preds = {}

  for col_binary in targets_binary:
    print(col_binary)
    x_train, x_val, y_train, y_val = split_data[col_binary]
    best_param = xgb_best_param_dict[col_binary]
    feat = features[col_binary]

    clf_binary = XGBClassifier(**best_param, **fix_param_binary,
                               use_label_encoder=False
    )

    smote = SMOTE(sampling_strategy='minority', random_state=seed)
    x_train_over, y_train_over = smote.fit_resample(x_train[feat], y_train)

    clf_binary.fit(
        x_train_over, y_train_over,
        eval_set=[(x_val[feat], y_val)],
        verbose=False,
    )

    y_pred = clf_binary.predict(x_val[feat])

    f1 = f1_score(y_val, y_pred)
    train_acc = clf_binary.score(x_train[feat], y_train)
    val_acc = clf_binary.score(x_val[feat], y_val)
    print(f'{col_binary} -> Train Acc: {train_acc:4f} | Validation Accucary: {val_acc:.4f} | Validation F1_score: {f1:.4f}')
    print()

    preds[col_binary] = clf_binary.predict(test_x[feat])

  for col_multi in target_multi:
    print(col_multi)
    x_train, x_val, y_train, y_val = split_data[col_multi]
    best_param = xgb_best_param_dict[col_multi]
    feat = features[col_multi]

    clf_multi = XGBClassifier(**best_param, **fix_param_multi,
                              use_label_encoder=False
    )

    smote = SMOTE(sampling_strategy='not majority', random_state=seed)
    x_train_over, y_train_over = smote.fit_resample(x_train[feat], y_train)

    clf_multi.fit(
        x_train_over, y_train_over,
        eval_set=[(x_val[feat], y_val)],
        verbose=False,
    )

    y_pred = clf_multi.predict(x_val[feat])

    f1 = f1_score(y_val, y_pred, average='macro')
    train_acc = clf_multi.score(x_train[feat], y_train)
    val_acc = clf_multi.score(x_val[feat], y_val)
    print(f'{col_multi} -> Train Acc: {train_acc:4f} | Validation Accucary: {val_acc:.4f} | Validation F1_score: {f1:.4f}')
    print()

    preds[col_multi] = clf_multi.predict(test_x[feat])

  return preds

In [34]:
xgb_best_param_dict

{'Q1': {'lr': 0.005158227045382019,
  'n_estimators': 500,
  'max_depth': 6,
  'subsample': 0.5014653515689952,
  'colsample_bytree': 0.9874942536147016,
  'gamma': 0.05156440289130511,
  'reg_alpha': 9.220831718455408,
  'reg_lambda': 0.0735303974249241},
 'Q2': {'lr': 0.007253718909191384,
  'n_estimators': 432,
  'max_depth': 3,
  'subsample': 0.5038814517500667,
  'colsample_bytree': 0.8406429214642972,
  'gamma': 0.17523222455117982,
  'reg_alpha': 1.5438489479832427,
  'reg_lambda': 0.001091359355825587},
 'Q3': {'lr': 0.0004404216002912915,
  'n_estimators': 324,
  'max_depth': 12,
  'subsample': 0.5226924038998063,
  'colsample_bytree': 0.8331966130714386,
  'gamma': 0.0015323702267678312,
  'reg_alpha': 0.022735694948487355,
  'reg_lambda': 0.0013044544681549623},
 'S2': {'lr': 0.04614071108938825,
  'n_estimators': 450,
  'max_depth': 8,
  'subsample': 0.9421204688567754,
  'colsample_bytree': 0.5304800114088037,
  'gamma': 0.016060080731164703,
  'reg_alpha': 0.1362755293398

In [35]:
preds = xgb_smote(split_data, xgb_best_param_dict, test_x)

Q1
[07:09:29] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (364, 105, 38220).
[07:09:29] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (90, 105, 9450).
Q1 -> Train Acc: 0.766667 | Validation Accucary: 0.6222 | Validation F1_score: 0.6222

Q2
[07:09:29] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (404, 105, 42420).
[07:09:29] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (90, 105, 9450).
Q2 -> Train Acc: 1.000000 | Validation Accucary: 0.5556 | Validation F1_score: 0.5455

Q3
[07:09:29] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (432, 105, 45360).
[07:09:29] INFO: /workspace/src/data/iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (90, 105, 9450).
Q3 -> Train Acc: 1.000000 | Validation Accucary: 0.6556 | Val

# smote + catb

In [36]:
catb_best_param_dict = {}

def smote_optuna_binary(split_date):
  for col in targets_binary:
    print(f'=== target: {col} ===')
    x_train, x_val, y_train, y_val = split_data[col]


    def objective_binary(trial):
      params = {'iterations' :trial.suggest_int("iterations", 100, 1000),
        'learning_rate': trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True),
        'depth': trial.suggest_int("depth", 4, 10),
        'l2_leaf_reg': trial.suggest_float("l2_leaf_reg", 1e-3, 100.0, log=True),
        'bootstrap_type': trial.suggest_categorical("bootstrap_type", ["Bayesian"]),
        'random_strength': trial.suggest_float("random_strength", 1e-3, 10.0, log=True),
        'bagging_temperature': trial.suggest_float("bagging_temperature", 0.0, 10.0),
        'od_type': trial.suggest_categorical("od_type", ["IncToDec", "Iter"]),
        'od_wait': trial.suggest_int("od_wait", 10, 50),
        'border_count': trial.suggest_int("border_count", 32, 255),
        'random_state': seed,
        'verbose': 0
                }

      feat = features[col]
      clf_binary = CatBoostClassifier(**params, task_type="GPU")
      pipeline = imbalanced_make_pipeline(SMOTE(sampling_strategy='minority', random_state=seed), clf_binary)

      cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
      score = cross_val_score(pipeline, x_train[feat], y_train, cv=cv, scoring='f1').mean()

      trial.report(score, step=0)
      if trial.should_prune():
        raise optuna.exceptions.TrialPruned()

      return score

    study = optuna.create_study(direction="maximize")
    study.optimize(objective_binary, n_trials=50)

    best_params = study.best_params
    best_score = study.best_value
    catb_best_param_dict[col] = best_params
    print(f"{col} 최적 파라미터: {best_params}")
    print(f"{col} 최고 F1 점수: {best_score:.4f}")
    print("=" * 50, "\n")

In [None]:
smote_optuna_binary(split_data)

[I 2025-10-24 07:09:32,507] A new study created in memory with name: no-name-5b3826f2-4b8d-42d7-bcf3-98a471eb9794


=== target: Q1 ===


[I 2025-10-24 07:10:35,511] Trial 0 finished with value: 0.5559356190988981 and parameters: {'iterations': 701, 'learning_rate': 1.4583258437369743e-05, 'depth': 5, 'l2_leaf_reg': 3.6772690759626188, 'bootstrap_type': 'Bayesian', 'random_strength': 0.0017646130726175287, 'bagging_temperature': 7.627793722370252, 'od_type': 'IncToDec', 'od_wait': 34, 'border_count': 178}. Best is trial 0 with value: 0.5559356190988981.
[I 2025-10-24 07:12:28,651] Trial 1 finished with value: 0.5520092566532195 and parameters: {'iterations': 678, 'learning_rate': 0.00041784213762068414, 'depth': 7, 'l2_leaf_reg': 0.5199262345560142, 'bootstrap_type': 'Bayesian', 'random_strength': 4.7459259525908575, 'bagging_temperature': 3.384806192204637, 'od_type': 'Iter', 'od_wait': 30, 'border_count': 240}. Best is trial 0 with value: 0.5559356190988981.
[I 2025-10-24 07:13:33,769] Trial 2 finished with value: 0.47818540614767213 and parameters: {'iterations': 653, 'learning_rate': 0.008785129315676316, 'depth': 6,

Q1 최적 파라미터: {'iterations': 950, 'learning_rate': 1.0725000291013526e-05, 'depth': 8, 'l2_leaf_reg': 97.05448563128365, 'bootstrap_type': 'Bayesian', 'random_strength': 0.07154403059197816, 'bagging_temperature': 7.5714789101390405, 'od_type': 'Iter', 'od_wait': 36, 'border_count': 143}
Q1 최고 F1 점수: 0.6250

=== target: Q2 ===


[I 2025-10-24 08:59:19,249] Trial 0 finished with value: 0.6520728745995943 and parameters: {'iterations': 782, 'learning_rate': 0.05095837022060957, 'depth': 7, 'l2_leaf_reg': 0.069687568229395, 'bootstrap_type': 'Bayesian', 'random_strength': 0.05753928328730566, 'bagging_temperature': 3.608429307417522, 'od_type': 'IncToDec', 'od_wait': 21, 'border_count': 163}. Best is trial 0 with value: 0.6520728745995943.
[I 2025-10-24 09:02:38,985] Trial 1 finished with value: 0.6623907394283014 and parameters: {'iterations': 868, 'learning_rate': 0.0004635721724001282, 'depth': 8, 'l2_leaf_reg': 0.10802866790742956, 'bootstrap_type': 'Bayesian', 'random_strength': 0.17456520174243242, 'bagging_temperature': 6.504045086257535, 'od_type': 'IncToDec', 'od_wait': 39, 'border_count': 134}. Best is trial 1 with value: 0.6623907394283014.
[I 2025-10-24 09:05:46,366] Trial 2 finished with value: 0.639846107262888 and parameters: {'iterations': 791, 'learning_rate': 4.141739583105101e-05, 'depth': 8, '

Q2 최적 파라미터: {'iterations': 331, 'learning_rate': 0.09849041304372688, 'depth': 7, 'l2_leaf_reg': 1.7963209336696702, 'bootstrap_type': 'Bayesian', 'random_strength': 3.2885026492443354, 'bagging_temperature': 7.8418144486768995, 'od_type': 'IncToDec', 'od_wait': 26, 'border_count': 217}
Q2 최고 F1 점수: 0.6818

=== target: Q3 ===


[I 2025-10-24 09:56:30,419] Trial 0 finished with value: 0.6631151489510152 and parameters: {'iterations': 461, 'learning_rate': 1.3914935305865043e-05, 'depth': 8, 'l2_leaf_reg': 0.005204426712052842, 'bootstrap_type': 'Bayesian', 'random_strength': 0.07044622951188226, 'bagging_temperature': 8.24013050995456, 'od_type': 'IncToDec', 'od_wait': 37, 'border_count': 93}. Best is trial 0 with value: 0.6631151489510152.
[I 2025-10-24 09:58:39,028] Trial 1 finished with value: 0.6514695696527382 and parameters: {'iterations': 557, 'learning_rate': 0.003969882703358422, 'depth': 8, 'l2_leaf_reg': 2.979505097275985, 'bootstrap_type': 'Bayesian', 'random_strength': 0.05673517994601318, 'bagging_temperature': 7.268323053073768, 'od_type': 'Iter', 'od_wait': 34, 'border_count': 201}. Best is trial 0 with value: 0.6631151489510152.
[I 2025-10-24 09:59:00,742] Trial 2 finished with value: 0.5730310997019857 and parameters: {'iterations': 375, 'learning_rate': 1.6922695335378307e-05, 'depth': 9, 'l

In [None]:
def smote_optuna_multi(split_date):
  for col in target_multi:
    print(f'=== target: {col} ===')
    x_train, x_val, y_train, y_val = split_data[col]

    def objective_multi(trial):
      params = {'learning_rate': trial.suggest_float("lr", 1e-5, 1e-1, log=True),
                'n_estimators': trial.suggest_int('n_estimators', 50, 300),
                'random_state': seed,
                'verbose': 0,
                'loss_function': 'MultiClass'
                }
      feat = features[col]

      clf_binary = CatBoostClassifier(**params, task_type="GPU")
      pipeline = imbalanced_make_pipeline(SMOTE(sampling_strategy='not majority', random_state=seed), clf_binary)

      cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
      score_macro = cross_val_score(pipeline, x_train, y_train, cv=cv, scoring='f1_macro').mean()

      trial.report(score_macro, step=0)
      if trial.should_prune():
        raise optuna.exceptions.TrialPruned()

      return score_macro

    study = optuna.create_study(direction="maximize")
    study.optimize(objective_multi, n_trials=50)

    best_params = study.best_params
    best_score = study.best_value
    catb_best_param_dict[col] = best_params
    print(f"{col} 최적 파라미터: {best_params}")
    print(f"{col} 최고 F1 점수: {best_score:.4f}")
    print("=" * 50, "\n")

In [None]:
smote_optuna_multi(split_data)

In [None]:
fix_param_binary = {'random_state': seed,
                    'n_jobs': -1,
                    'verbose': False
                    }

fix_param_multi = {'random_sate': seed,
                   'n_jobs': -1,
                   'verbose': False,
                   'loss_function': 'MultiClass'
                   }

In [None]:
def catb_smote(split_data, xgb_best_param_dict, test_x):
  preds = {}

  for col_binary in targets_binary:
    print(col_binary)
    x_train, x_val, y_train, y_val = split_data[col_binary]
    best_param = catb_best_param_dict[col_binary]
    feat = features[col_binary]

    clf_binary = CatBoostClassifier(**best_param, **fix_param_binary, task_type="GPU")

    smote = SMOTE(sampling_strategy='minority', random_state=seed)
    x_train_over, y_train_over = smote.fit_resample(x_train[feat], y_train)

    clf_binary.fit(
        x_train_over, y_train_over,
        eval_set=(x_val[feat], y_val),
        early_stopping_rounds=10,
        verbose=False,
        eval_metric='Logloss'
        )

    y_pred = clf_binary.predict(x_val[feat])

    f1 = f1_score(y_val, y_pred)
    train_acc = clf_binary.score(x_train[feat], y_train)
    val_acc = clf_binary.score(x_val[feat], y_val)
    print(f'{col_binary} -> Train Acc: {train_acc:4f} | Validation Accucary: {val_acc:.4f} | Validation F1_score: {f1:.4f}')
    print()

    preds[col_binary] = clf_binary.predict(test_x[feat])

  for col_multi in target_multi:
    print(col_multi)
    x_train, x_val, y_train, y_val = split_data[col_multi]
    best_param = catb_best_param_dict[col_multi]
    feat = features[col_multi]

    clf_multi = CatBoostClassifier(**best_param, **fix_param_multi, task_type="GPU")

    smote = SMOTE(sampling_strategy='not majority', random_state=seed)
    x_train_over, y_train_over = smote.fit_resample(x_train[feat], y_train)

    clf_multi.fit(
        x_train_over, y_train_over
        eval_set=(x_val[feat], y_val),
        early_stopping_rounds=10,
        verbose=False,
        eval_metric='MultiClass'
    )

    y_pred = clf_multi.predict(x_val[feat])

    f1 = f1_score(y_val, y_pred, average='macro')
    train_acc = clf_multi.score(x_train[feat], y_train)
    val_acc = clf_multi.score(x_val[feat], y_val)
    print(f'{col_multi} -> Train Acc: {train_acc:4f} | Validation Accucary: {val_acc:.4f} | Validation F1_score: {f1:.4f}')
    print()

    preds[col_multi] = clf_multi.predict(test_x[feat])

  return preds

In [None]:
catb_best_param_dict

In [None]:
preds = catb_smote(split_data, catb_best_param_dict, test_x)

# voting

In [None]:
def prediction(models, test, mode=None, weights=None):
    #hard voting일 경우
    if mode == "hard":
        preds = np.asarray([x.predict(test).reshape(-1) for x in models]).T
        res = np.apply_along_axis(
            lambda x: np.argmax(np.bincount(x, weights=weights)),
            axis=1,
            arr=preds
        )
    #soft voting일 경우
    elif mode == "soft":
        preds = np.asarray([x.predict_proba(test) for x in models])
        res = np.zeros(preds[0].shape)
        for pred, weight in zip(preds, weights):
            res = res + pred*weight
        res = np.argmax(preds, axis=0)
    else:
        res = models[0].predict(test)
    return res

#hard voting을 선택하여 모델 중요도별 가중치 제공.
preds = prediction(models, test_x, 'hard', [2,5,4] )

In [None]:
def voting(split_data,
           lgbm_best_param_dict, xgb_best_param_dict, catb_best_param_dict,
           test_x):

    preds = {}

    # 이진 분류
    for col_binary in targets_binary:
        x_train, x_val, y_train, y_val = split_data[col_binary]
        feat = features[col_binary]

        lgbm = LGBMClassifier(**lgbm_best_param_dict[col_binary],
                              random_state = seed,
                              n_jobs = -1,
                              verbosity = -1)
        xgb = XGBClassifier(**xgb_best_param_dict[col_binary],
                            random_state = seed,
                            n_jobs = -1,
                            verbosity = 0)
        catb = CatBoostClassifier(**catb_best_param_dict[col_binary],
                                  random_state = seed,
                                  n_jobs=-1,
                                  verbose=False)

        smote = SMOTE(sampling_strategy='minority', random_state=seed)
        x_train_over, y_train_over = smote.fit_resample(x_train[feat], y_train)

        voting_binary = VotingClassifier(
            estimators=[('lgbm', lgbm), ('xgb', xgb), ('catb', catb)],
            voting='hard'
        )

        voting_binary.fit(x_train_over, y_train_over)
        y_pred = voting_binary.predict(x_val[feat])

        f1 = f1_score(y_val, y_pred)
        train_acc = voting_binary.score(x_train[feat], y_train)
        val_acc = voting_binary.score(x_val[feat], y_val)

        print(f'Binary {col_binary} -> Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f} | Val F1: {f1:.4f}')
        preds[col_binary] = voting_binary.predict(test_x[feat])

    # 다중 분류
    for col_multi in target_multi:
        x_train, x_val, y_train, y_val = split_data[col_multi]
        feat = features[col_multi]

        model1 = LGBMClassifier(**lgbm_param[col_multi],
                                random_sate = seed,
                                n_jobs =-1,
                                verbosity = -1,
                                objective = 'multiclass',
                                num_class = 3)
        model2 = XGBClassifier(**xgb_param[col_multi],
                               random_sate = seed,
                               n_jobs =-1,
                               verbosity = 0,
                               objective = 'multiclass',
                               num_class = 3)
        model3 = CatBoostClassifier(**catb_param,
                                    random_sate = seed, n_jobs = -1,
                                    verbose = False,
                                    loss_function = 'MultiClass')

        smote = SMOTE(sampling_strategy='not majority', random_state=seed)
        x_train_over, y_train_over = smote.fit_resample(x_train[feat], y_train)

        voting_multi = VotingClassifier(
            estimators=[('lgbm', model1), ('xgb', model2), ('catb', model3)],
            voting='soft'
        )

        voting_multi.fit(x_train_over, y_train_over)
        y_pred = voting_multi.predict(x_val[feat])

        f1 = f1_score(y_val, y_pred, average='macro')
        train_acc = voting_multi.score(x_train[feat], y_train)
        val_acc = voting_multi.score(x_val[feat], y_val)

        print(f'Multi {col_multi} -> Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f} | Val F1: {f1:.4f}')
        preds[col_multi] = voting_multi.predict(test_x[feat])

    return preds

In [None]:
preds = voting(split_data, lgbm_best_param_dict, xgb_best_param_dict, catb_best_param_dict, test_x)

# stacking

# 결과저장

In [None]:
final = sample_submission[['subject_id','sleep_date','lifelog_date']].copy()
final['lifelog_date'] = pd.to_datetime(final['lifelog_date']).dt.date

final['id'] = final['subject_id'] + '_' + final['lifelog_date'].astype(str)
assert len(final) == len(preds['S1'])

target_list = ['Q1', 'Q2', 'Q3', 'S1','S2','S3']
for col in target_list:
  final[col] = preds[col].astype(int)

submission_final = final[['subject_id','sleep_date','lifelog_date', 'Q1','Q2','Q3','S1','S2','S3']]
submission_final.to_csv('submission_final.csv',index=False)

from google.colab import files
files.download("submission_final.csv")