## 1. 데이터 불러오기

In [1]:
import numpy as np
import pandas as pd
from lightgbm import LGBMRegressor
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split

In [2]:
train = pd.read_csv('./train.csv')
test = pd.read_csv('./test.csv')

"""
[train.csv]                                         [test.csv]
ID : 샘플별 고유 ID                                 ID : 샘플별 고유 ID
gender : 성별                                       gender : 성별
age : 연령                                          age : 연령
height : 키(cm)                                     height : 키(cm)
weight : 몸무게(kg)                                 weight : 몸무게(kg)
cholesterol : 콜레스테롤 수치                       cholesterol : 콜레스테롤 수치
systolic_blood_pressure : 수축기 혈압               systolic_blood_pressure : 수축기 혈압
diastolic_blood_pressure : 이완기 혈압              diastolic_blood_pressure : 이완기 혈압
glucose : 혈당 수치(mg/dL)                          glucose : 혈당 수치(mg/dL)
bone_density : 골밀도(g/cm²)                        bone_density : 골밀도(g/cm²)
activity : 생활시 운동 강도                         activity : 생활시 운동 강도
smoke_status : 흡연 상태                            smoke_status : 흡연 상태
medical_history : 만성질환                          medical_history : 만성질환
family_medical_history : 가족력                     family_medical_history : 가족력
sleep_pattern : 수면패턴                            sleep_pattern : 수면패턴
edu_level : 학력                                    edu_level : 학력
mean_working : 1주일당 평균 근로 시간               mean_working : 1주일당 평균 근로 시간
stress_score : (TARGET) 스트레스 점수                    
"""

display(train.shape)

(3000, 18)

## 2. 데이터 전처리

In [3]:
# 카테고리형 변수의 결측값을 최빈값으로 대체
# 최빈값 -> 가장 많이 관측된 값
categorical_na_cols = train.select_dtypes(include=['object', 'category']).columns.drop('ID')

for col in categorical_na_cols:
    # # 학습 데이터에서 최빈값 계산
    # most_frequent = train[col].mode()[0]
    # print(f"{col}: {most_frequent}")
    # # 학습 데이터와 테스트 데이터 모두 해당 칼럼의 최빈값으로 대체
    # train[col] = train[col].fillna(most_frequent)
    # test[col] = test[col].fillna(most_frequent)

    train[col] = train[col].fillna('None')
    test[col] = test[col].fillna('None')

    print(f"{col}: {train[col].unique()}")

gender: ['F' 'M']
activity: ['moderate' 'light' 'intense']
smoke_status: ['ex-smoker' 'non-smoker' 'current-smoker']
medical_history: ['high blood pressure' 'None' 'diabetes' 'heart disease']
family_medical_history: ['diabetes' 'None' 'high blood pressure' 'heart disease']
sleep_pattern: ['sleep difficulty' 'normal' 'oversleeping']
edu_level: ['bachelors degree' 'graduate degree' 'high school diploma' 'None']


In [4]:
# mean_working에 대해 중앙값 대체
# median_value = train['mean_working'].median()

# mean_working 결측치 -> 0
# train['mean_working'] = train['mean_working'].fillna(0)
# test['mean_working'] = test['mean_working'].fillna(0)

# mean_working 결측치를 평균값으로 대체
mean_value = train['mean_working'].mean()
train['mean_working'] = train['mean_working'].fillna(mean_value)
test['mean_working'] = test['mean_working'].fillna(mean_value)

display(train.columns)

Index(['ID', 'gender', 'age', 'height', 'weight', 'cholesterol',
       'systolic_blood_pressure', 'diastolic_blood_pressure', 'glucose',
       'bone_density', 'activity', 'smoke_status', 'medical_history',
       'family_medical_history', 'sleep_pattern', 'edu_level', 'mean_working',
       'stress_score'],
      dtype='object')

In [5]:
# mean_working이 0이면 0, 아니면 1로 is_working 컬럼 추가
train['is_working'] = (train['mean_working'] != 0).astype(int)
test['is_working'] = (test['mean_working'] != 0).astype(int)

In [6]:
# 1. 컬럼 목록에서 is_working이 있는지 확인
print('is_working' in train.columns)  # True면 정상 추가

# 2. train 데이터프레임의 상위 5개 행에서 is_working 컬럼 확인
print(train[['mean_working', 'is_working']].head())

# 3. is_working 컬럼의 값 분포(0/1 개수) 확인
print(train['is_working'].value_counts())

True
   mean_working  is_working
0      8.716972           1
1      8.716972           1
2      9.000000           1
3      8.716972           1
4      8.716972           1
is_working
1    3000
Name: count, dtype: int64


In [7]:
# One-Hot Encoding 적용 열 - gender, smoke_status, medical_history, family_medical_history
# 원핫 인코딩 => 카테고리끼리 순서 없을때 사용
# smoke_status는 좀 애매하긴 한듯
def one_hot_encoding(df):
    one_hot_encoding_cols = ['gender', 'smoke_status', 'medical_history', 'family_medical_history']
    encoding_df = pd.get_dummies(df, columns=one_hot_encoding_cols)
    return encoding_df

train = one_hot_encoding(train)
test = one_hot_encoding(test)

display(train.columns)
display(train.shape)

Index(['ID', 'age', 'height', 'weight', 'cholesterol',
       'systolic_blood_pressure', 'diastolic_blood_pressure', 'glucose',
       'bone_density', 'activity', 'sleep_pattern', 'edu_level',
       'mean_working', 'stress_score', 'is_working', 'gender_F', 'gender_M',
       'smoke_status_current-smoker', 'smoke_status_ex-smoker',
       'smoke_status_non-smoker', 'medical_history_None',
       'medical_history_diabetes', 'medical_history_heart disease',
       'medical_history_high blood pressure', 'family_medical_history_None',
       'family_medical_history_diabetes',
       'family_medical_history_heart disease',
       'family_medical_history_high blood pressure'],
      dtype='object')

(3000, 28)

In [8]:
# Label Encoding 적용 열 - activity, sleep_pattern, edu_level
# 라벨 인코딩 => 카테고리끼리 순서 있을때 사용
def label_encoding(df):
    label_encoding_cols = ['activity', 'sleep_pattern', 'edu_level']
    for feature in label_encoding_cols:
        le = LabelEncoder()
        le.fit(df[feature])
        df[feature] = le.transform(df[feature])
    return df

train = label_encoding(train)
test = label_encoding(test)

In [9]:
# 모든 bool 컬럼을 int로 변환
for col in train.columns:
    if train[col].dtype == 'bool':
        train[col] = train[col].astype(int)
for col in test.columns:
    if test[col].dtype == 'bool':
        test[col] = test[col].astype(int)

In [10]:
train

Unnamed: 0,ID,age,height,weight,cholesterol,systolic_blood_pressure,diastolic_blood_pressure,glucose,bone_density,activity,...,smoke_status_ex-smoker,smoke_status_non-smoker,medical_history_None,medical_history_diabetes,medical_history_heart disease,medical_history_high blood pressure,family_medical_history_None,family_medical_history_diabetes,family_medical_history_heart disease,family_medical_history_high blood pressure
0,TRAIN_0000,72,161.49,58.47,279.84,165,100,143.35,0.87,2,...,1,0,0,0,0,1,0,1,0,0
1,TRAIN_0001,88,179.87,77.60,257.37,178,111,146.94,0.07,2,...,1,0,1,0,0,0,0,1,0,0
2,TRAIN_0002,47,182.47,89.93,226.66,134,95,142.61,1.18,1,...,1,0,1,0,0,0,1,0,0,0
3,TRAIN_0003,69,185.78,68.63,206.74,158,92,137.26,0.48,0,...,1,0,0,0,0,1,1,0,0,0
4,TRAIN_0004,81,164.63,71.53,255.92,171,116,129.37,0.34,2,...,1,0,0,1,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,TRAIN_2995,87,174.43,77.14,233.31,176,111,134.71,0.20,1,...,0,1,1,0,0,0,1,0,0,0
2996,TRAIN_2996,25,171.44,51.67,196.89,143,86,111.91,1.75,2,...,0,0,1,0,0,0,1,0,0,0
2997,TRAIN_2997,54,162.13,55.54,210.52,164,91,136.47,0.80,1,...,0,0,1,0,0,0,1,0,0,0
2998,TRAIN_2998,70,181.32,82.20,234.10,182,91,144.89,0.58,2,...,0,1,1,0,0,0,1,0,0,0


## 3. 피처 엔지니어링

In [11]:
# BMI => 몸무게[kg] / 키[cm]^2
# 혈압차(맥압) => 수축성 혈압 - 이완성 혈압
def add_features(df):
    df['BMI'] = df['weight'] / ((df['height']/100)**2)
    df['bp_diff'] = df['systolic_blood_pressure'] - df['diastolic_blood_pressure']
    return df

train = add_features(train)
test = add_features(test)

In [12]:
display(train['mean_working'].describe())

# working_group 피처 만들기
bins = [3, 8, 9, 16]  # 적절한 경계값 설정
labels = [0, 1, 2]     # 저, 중하, 중상, 고 근무

train['working_group'] = pd.cut(train['mean_working'], bins=bins, labels=labels, include_lowest=True)
test['working_group'] = pd.cut(test['mean_working'], bins=bins, labels=labels, include_lowest=True)

overwork_threshold = train['mean_working'].quantile(0.95)  # 상위 5% 기준값
train['is_overwork'] = (train['mean_working'] >= overwork_threshold).astype(int)
test['is_overwork'] = (test['mean_working'] >= overwork_threshold).astype(int)

display(train['working_group'].value_counts())
display(train['is_overwork'].value_counts())

count    3000.000000
mean        8.716972
std         1.319229
min         4.000000
25%         8.000000
50%         8.716972
75%         9.000000
max        16.000000
Name: mean_working, dtype: float64

working_group
1    1569
0     888
2     543
Name: count, dtype: int64

is_overwork
0    2803
1     197
Name: count, dtype: int64

In [13]:
# mean_working & sleep_pattern 피처 엔지니어링

train['work_sleep_imbalance'] = train['mean_working'] / (train['sleep_pattern'] + 1)
test['work_sleep_imbalance'] = test['mean_working'] / (test['sleep_pattern'] + 1)

display(train[['work_sleep_imbalance', 'stress_score']])

for col in train.columns:
    if col not in ['ID', 'stress_score']:
        corr_val = train['stress_score'].corr(train[col])
        print(f"{col} ↔ stress_score 상관계수: {corr_val:.4f}")

Unnamed: 0,work_sleep_imbalance,stress_score
0,2.905657,0.63
1,8.716972,0.83
2,9.000000,0.70
3,4.358486,0.17
4,2.905657,0.36
...,...,...
2995,4.358486,0.02
2996,9.000000,0.16
2997,9.000000,0.16
2998,4.358486,0.18


age ↔ stress_score 상관계수: 0.0187
height ↔ stress_score 상관계수: -0.0057
weight ↔ stress_score 상관계수: 0.0113
cholesterol ↔ stress_score 상관계수: 0.0213
systolic_blood_pressure ↔ stress_score 상관계수: 0.0156
diastolic_blood_pressure ↔ stress_score 상관계수: 0.0254
glucose ↔ stress_score 상관계수: -0.0061
bone_density ↔ stress_score 상관계수: -0.0226
activity ↔ stress_score 상관계수: 0.0070
sleep_pattern ↔ stress_score 상관계수: 0.0002
edu_level ↔ stress_score 상관계수: -0.0271
mean_working ↔ stress_score 상관계수: 0.1507
is_working ↔ stress_score 상관계수: nan
gender_F ↔ stress_score 상관계수: 0.0136
gender_M ↔ stress_score 상관계수: -0.0136
smoke_status_current-smoker ↔ stress_score 상관계수: 0.0261
smoke_status_ex-smoker ↔ stress_score 상관계수: -0.0320
smoke_status_non-smoker ↔ stress_score 상관계수: 0.0087
medical_history_None ↔ stress_score 상관계수: -0.0504
medical_history_diabetes ↔ stress_score 상관계수: 0.0245
medical_history_heart disease ↔ stress_score 상관계수: 0.0000
medical_history_high blood pressure ↔ stress_score 상관계수: 0.0373
family_medical_his

  c /= stddev[:, None]
  c /= stddev[None, :]


## 4. 모델링

In [14]:
from sklearn.model_selection import train_test_split

X = train.drop(['ID', 'stress_score'], axis=1)
y = train['stress_score']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
# x_train = train.drop(['ID', 'stress_score'], axis = 1)
# x_val = train['stress_score']
# y_train = train['stress_score']

test = test.drop('ID', axis = 1)

In [16]:
# lgbm = LGBMRegressor(random_state = 42)
# lgbm.fit(x_train, y_train)

# pred = lgbm.predict(test)

In [22]:
# category 타입 컬럼을 int로 변환
for col in X_train.columns:
    if str(X_train[col].dtype) == 'category':
        X_train[col] = X_train[col].astype(int)
        X_val[col] = X_val[col].astype(int)

# train, test의 category 타입 컬럼을 int로 변환
for col in train.columns:
    if str(train[col].dtype) == 'category':
        train[col] = train[col].astype(int)
for col in test.columns:
    if str(test[col].dtype) == 'category':
        test[col] = test[col].astype(int)

In [20]:
import optuna
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error

def objective(trial):
    # 하이퍼파라미터 샘플링
    lgbm_params = {
        'n_estimators': trial.suggest_int('lgbm_n_estimators', 100, 1000),
        'learning_rate': trial.suggest_float('lgbm_learning_rate', 0.01, 0.3),
        'num_leaves': trial.suggest_int('lgbm_num_leaves', 20, 100),
        'random_state': 42
    }
    xgb_params = {
        'n_estimators': trial.suggest_int('xgb_n_estimators', 100, 1000),
        'learning_rate': trial.suggest_float('xgb_learning_rate', 0.01, 0.3),
        'max_depth': trial.suggest_int('xgb_max_depth', 3, 10),
        'random_state': 42
    }
    cat_params = {
        'iterations': trial.suggest_int('cat_iterations', 100, 1000),
        'learning_rate': trial.suggest_float('cat_learning_rate', 0.01, 0.3),
        'depth': trial.suggest_int('cat_depth', 3, 10),
        'random_seed': 42,
        'verbose': 0
    }

    # 모델 학습
    lgbm = LGBMRegressor(**lgbm_params)
    xgb = XGBRegressor(**xgb_params)
    cat = CatBoostRegressor(**cat_params)

    lgbm.fit(X_train, y_train)
    xgb.fit(X_train, y_train)
    cat.fit(X_train, y_train)

    # 예측
    pred_lgbm = lgbm.predict(X_val)
    pred_xgb = xgb.predict(X_val)
    pred_cat = cat.predict(X_val)

    # 앙상블 가중치 샘플링 및 정규화
    w_lgbm = trial.suggest_float('w_lgbm', 0, 1)
    w_xgb = trial.suggest_float('w_xgb', 0, 1)
    w_cat = trial.suggest_float('w_cat', 0, 1)
    total = w_lgbm + w_xgb + w_cat
    w_lgbm /= total
    w_xgb /= total
    w_cat /= total

    # 앙상블 예측
    pred_ensemble = w_lgbm * pred_lgbm + w_xgb * pred_xgb + w_cat * pred_cat

    # MAE 계산
    mae = mean_absolute_error(y_val, pred_ensemble)
    return mae

# Optuna Study 생성 및 최적화
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

print('Best trial:')
print(study.best_trial)
print('Best params:', study.best_trial.params)

[I 2025-07-23 21:38:38,724] A new study created in memory with name: no-name-144f2957-35ae-4802-8dfb-ff8bf5d94474


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000120 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:38:42,487] Trial 0 finished with value: 0.17035710002588067 and parameters: {'lgbm_n_estimators': 768, 'lgbm_learning_rate': 0.14383738246051886, 'lgbm_num_leaves': 35, 'xgb_n_estimators': 262, 'xgb_learning_rate': 0.17252536267726132, 'xgb_max_depth': 6, 'cat_iterations': 280, 'cat_learning_rate': 0.1852669773030281, 'cat_depth': 4, 'w_lgbm': 0.35932908587518664, 'w_xgb': 0.09281674174145804, 'w_cat': 0.13966402818998314}. Best is trial 0 with value: 0.17035710002588067.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000146 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:38:49,572] Trial 1 finished with value: 0.1704521807143673 and parameters: {'lgbm_n_estimators': 643, 'lgbm_learning_rate': 0.2828283732444564, 'lgbm_num_leaves': 86, 'xgb_n_estimators': 323, 'xgb_learning_rate': 0.20647194574258024, 'xgb_max_depth': 6, 'cat_iterations': 887, 'cat_learning_rate': 0.21056733052064655, 'cat_depth': 5, 'w_lgbm': 0.39690398214810463, 'w_xgb': 0.044214199149247846, 'w_cat': 0.5331644888418493}. Best is trial 0 with value: 0.17035710002588067.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000389 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:39:00,216] Trial 2 finished with value: 0.16293049742907115 and parameters: {'lgbm_n_estimators': 494, 'lgbm_learning_rate': 0.26945781356459925, 'lgbm_num_leaves': 44, 'xgb_n_estimators': 555, 'xgb_learning_rate': 0.16985711743214008, 'xgb_max_depth': 8, 'cat_iterations': 872, 'cat_learning_rate': 0.22002245431315798, 'cat_depth': 9, 'w_lgbm': 0.8489276922513466, 'w_xgb': 0.8173187061941226, 'w_cat': 0.5782943435580077}. Best is trial 2 with value: 0.16293049742907115.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000249 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:39:26,326] Trial 3 finished with value: 0.16112786264854792 and parameters: {'lgbm_n_estimators': 397, 'lgbm_learning_rate': 0.29465550507377225, 'lgbm_num_leaves': 96, 'xgb_n_estimators': 731, 'xgb_learning_rate': 0.05133286316453557, 'xgb_max_depth': 10, 'cat_iterations': 853, 'cat_learning_rate': 0.22043007182733007, 'cat_depth': 10, 'w_lgbm': 0.7054886286888137, 'w_xgb': 0.6988401367315369, 'w_cat': 0.7579940979249365}. Best is trial 3 with value: 0.16112786264854792.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000325 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:39:41,321] Trial 4 finished with value: 0.1584136727393995 and parameters: {'lgbm_n_estimators': 935, 'lgbm_learning_rate': 0.21775723125516702, 'lgbm_num_leaves': 57, 'xgb_n_estimators': 345, 'xgb_learning_rate': 0.20895485336686878, 'xgb_max_depth': 9, 'cat_iterations': 514, 'cat_learning_rate': 0.20141083639172153, 'cat_depth': 10, 'w_lgbm': 0.05378419929288647, 'w_xgb': 0.037982059732657336, 'w_cat': 0.3980228727352787}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005241 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:39:46,160] Trial 5 finished with value: 0.16679003022432345 and parameters: {'lgbm_n_estimators': 233, 'lgbm_learning_rate': 0.09203736811453864, 'lgbm_num_leaves': 61, 'xgb_n_estimators': 558, 'xgb_learning_rate': 0.2883314980106619, 'xgb_max_depth': 6, 'cat_iterations': 486, 'cat_learning_rate': 0.1755383019017627, 'cat_depth': 7, 'w_lgbm': 0.459475817786013, 'w_xgb': 0.2174432531948136, 'w_cat': 0.9140265014959328}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000351 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:39:48,501] Trial 6 finished with value: 0.18977115502480557 and parameters: {'lgbm_n_estimators': 225, 'lgbm_learning_rate': 0.06755419865430307, 'lgbm_num_leaves': 41, 'xgb_n_estimators': 699, 'xgb_learning_rate': 0.21412131563642314, 'xgb_max_depth': 6, 'cat_iterations': 834, 'cat_learning_rate': 0.29298493624916694, 'cat_depth': 3, 'w_lgbm': 0.2072190357007785, 'w_xgb': 0.1399487126995147, 'w_cat': 0.9266225775720662}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000217 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:39:51,136] Trial 7 finished with value: 0.19061925667228047 and parameters: {'lgbm_n_estimators': 714, 'lgbm_learning_rate': 0.03502185571708426, 'lgbm_num_leaves': 75, 'xgb_n_estimators': 496, 'xgb_learning_rate': 0.27381254392474236, 'xgb_max_depth': 4, 'cat_iterations': 278, 'cat_learning_rate': 0.13279862727117672, 'cat_depth': 5, 'w_lgbm': 0.08894794066718892, 'w_xgb': 0.9324178287566403, 'w_cat': 0.7579531200383539}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001618 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:39:52,581] Trial 8 finished with value: 0.18116490496843288 and parameters: {'lgbm_n_estimators': 516, 'lgbm_learning_rate': 0.04442699588691939, 'lgbm_num_leaves': 50, 'xgb_n_estimators': 200, 'xgb_learning_rate': 0.2782011441383048, 'xgb_max_depth': 4, 'cat_iterations': 412, 'cat_learning_rate': 0.10188267513857491, 'cat_depth': 3, 'w_lgbm': 0.8915772553264253, 'w_xgb': 0.9352480399655566, 'w_cat': 0.041844851473603506}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000272 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:40:04,578] Trial 9 finished with value: 0.16344314701318174 and parameters: {'lgbm_n_estimators': 333, 'lgbm_learning_rate': 0.2417699782517843, 'lgbm_num_leaves': 94, 'xgb_n_estimators': 868, 'xgb_learning_rate': 0.22172141884463814, 'xgb_max_depth': 7, 'cat_iterations': 996, 'cat_learning_rate': 0.09395865080045669, 'cat_depth': 9, 'w_lgbm': 0.21500821059389108, 'w_xgb': 0.6468696009907464, 'w_cat': 0.7248857646180694}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000415 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:40:09,929] Trial 10 finished with value: 0.187827074102227 and parameters: {'lgbm_n_estimators': 990, 'lgbm_learning_rate': 0.20324574223625183, 'lgbm_num_leaves': 27, 'xgb_n_estimators': 360, 'xgb_learning_rate': 0.04574186620594517, 'xgb_max_depth': 10, 'cat_iterations': 633, 'cat_learning_rate': 0.018869253334536207, 'cat_depth': 7, 'w_lgbm': 0.007153410547828948, 'w_xgb': 0.4052534567792919, 'w_cat': 0.3164386920474104}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000287 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:40:30,654] Trial 11 finished with value: 0.16249702520207723 and parameters: {'lgbm_n_estimators': 1000, 'lgbm_learning_rate': 0.21704679966410426, 'lgbm_num_leaves': 64, 'xgb_n_estimators': 889, 'xgb_learning_rate': 0.0699748525118245, 'xgb_max_depth': 10, 'cat_iterations': 591, 'cat_learning_rate': 0.2666166725442083, 'cat_depth': 10, 'w_lgbm': 0.6778137051763456, 'w_xgb': 0.5135352599796762, 'w_cat': 0.3469486789789488}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000947 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:40:51,527] Trial 12 finished with value: 0.16202290887024476 and parameters: {'lgbm_n_estimators': 392, 'lgbm_learning_rate': 0.29946419235944033, 'lgbm_num_leaves': 73, 'xgb_n_estimators': 742, 'xgb_learning_rate': 0.10195681070980694, 'xgb_max_depth': 9, 'cat_iterations': 704, 'cat_learning_rate': 0.24528008223869469, 'cat_depth': 10, 'w_lgbm': 0.6312892460354579, 'w_xgb': 0.6889912361832082, 'w_cat': 0.40920952873300104}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001300 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:40:55,014] Trial 13 finished with value: 0.176909701867879 and parameters: {'lgbm_n_estimators': 847, 'lgbm_learning_rate': 0.17850791531214108, 'lgbm_num_leaves': 100, 'xgb_n_estimators': 110, 'xgb_learning_rate': 0.1327292286785973, 'xgb_max_depth': 9, 'cat_iterations': 130, 'cat_learning_rate': 0.14410179635365583, 'cat_depth': 8, 'w_lgbm': 0.70042726930352, 'w_xgb': 0.3514481731347889, 'w_cat': 0.7091994758969822}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000201 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:41:03,211] Trial 14 finished with value: 0.16562966000293522 and parameters: {'lgbm_n_estimators': 101, 'lgbm_learning_rate': 0.14404820003004537, 'lgbm_num_leaves': 79, 'xgb_n_estimators': 427, 'xgb_learning_rate': 0.10827730126507237, 'xgb_max_depth': 8, 'cat_iterations': 713, 'cat_learning_rate': 0.2352256115178413, 'cat_depth': 9, 'w_lgbm': 0.592815679734208, 'w_xgb': 0.5838617864448115, 'w_cat': 0.22322973323097195}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000390 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:41:17,754] Trial 15 finished with value: 0.1670798339029159 and parameters: {'lgbm_n_estimators': 599, 'lgbm_learning_rate': 0.2491027589148055, 'lgbm_num_leaves': 56, 'xgb_n_estimators': 695, 'xgb_learning_rate': 0.011927635214560604, 'xgb_max_depth': 10, 'cat_iterations': 438, 'cat_learning_rate': 0.19390634770142212, 'cat_depth': 10, 'w_lgbm': 0.9985123077369021, 'w_xgb': 0.7991783411911871, 'w_cat': 0.6279077891199303}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000332 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:41:25,370] Trial 16 finished with value: 0.1631411354642261 and parameters: {'lgbm_n_estimators': 860, 'lgbm_learning_rate': 0.21833934238691327, 'lgbm_num_leaves': 20, 'xgb_n_estimators': 796, 'xgb_learning_rate': 0.23177458122682426, 'xgb_max_depth': 8, 'cat_iterations': 735, 'cat_learning_rate': 0.2915282192165925, 'cat_depth': 8, 'w_lgbm': 0.7960008864533136, 'w_xgb': 0.3201899126400367, 'w_cat': 0.4211858180041325}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000241 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:41:34,376] Trial 17 finished with value: 0.17023523974229182 and parameters: {'lgbm_n_estimators': 424, 'lgbm_learning_rate': 0.17382038922930845, 'lgbm_num_leaves': 70, 'xgb_n_estimators': 600, 'xgb_learning_rate': 0.010626151511222953, 'xgb_max_depth': 9, 'cat_iterations': 996, 'cat_learning_rate': 0.16335378482891427, 'cat_depth': 8, 'w_lgbm': 0.5234713033845014, 'w_xgb': 0.45496402612785963, 'w_cat': 0.7949163414984997}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000302 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:41:38,253] Trial 18 finished with value: 0.18974544222923717 and parameters: {'lgbm_n_estimators': 276, 'lgbm_learning_rate': 0.252102372570247, 'lgbm_num_leaves': 85, 'xgb_n_estimators': 962, 'xgb_learning_rate': 0.14469929397486703, 'xgb_max_depth': 9, 'cat_iterations': 511, 'cat_learning_rate': 0.016604477558890524, 'cat_depth': 6, 'w_lgbm': 0.27840446199105146, 'w_xgb': 0.25378365788788926, 'w_cat': 0.4889640340387829}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000465 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:41:47,977] Trial 19 finished with value: 0.1759718813249288 and parameters: {'lgbm_n_estimators': 886, 'lgbm_learning_rate': 0.1160000404913269, 'lgbm_num_leaves': 54, 'xgb_n_estimators': 422, 'xgb_learning_rate': 0.07580734648096488, 'xgb_max_depth': 3, 'cat_iterations': 311, 'cat_learning_rate': 0.25162526515013356, 'cat_depth': 10, 'w_lgbm': 0.7424398548956037, 'w_xgb': 0.7563429861722261, 'w_cat': 0.8534425722896912}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002807 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:41:59,982] Trial 20 finished with value: 0.16084783741667097 and parameters: {'lgbm_n_estimators': 715, 'lgbm_learning_rate': 0.29852366890567894, 'lgbm_num_leaves': 66, 'xgb_n_estimators': 632, 'xgb_learning_rate': 0.18456814842143698, 'xgb_max_depth': 7, 'cat_iterations': 793, 'cat_learning_rate': 0.1154470519926995, 'cat_depth': 9, 'w_lgbm': 0.541959784125296, 'w_xgb': 0.5489397438965874, 'w_cat': 0.6357135094043999}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000313 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:42:09,676] Trial 21 finished with value: 0.1608754524420898 and parameters: {'lgbm_n_estimators': 701, 'lgbm_learning_rate': 0.2961485040653786, 'lgbm_num_leaves': 66, 'xgb_n_estimators': 642, 'xgb_learning_rate': 0.18667833979459408, 'xgb_max_depth': 7, 'cat_iterations': 797, 'cat_learning_rate': 0.11859905358553664, 'cat_depth': 9, 'w_lgbm': 0.5460955865305486, 'w_xgb': 0.5477806826991831, 'w_cat': 0.6326286085373166}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000307 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:42:19,315] Trial 22 finished with value: 0.1623870097786469 and parameters: {'lgbm_n_estimators': 746, 'lgbm_learning_rate': 0.2649632637429836, 'lgbm_num_leaves': 64, 'xgb_n_estimators': 634, 'xgb_learning_rate': 0.19096880341836117, 'xgb_max_depth': 7, 'cat_iterations': 769, 'cat_learning_rate': 0.07042006666813469, 'cat_depth': 9, 'w_lgbm': 0.5414156282391823, 'w_xgb': 0.5544305026868849, 'w_cat': 0.6558781524888957}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000481 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:42:25,871] Trial 23 finished with value: 0.16856945774712198 and parameters: {'lgbm_n_estimators': 648, 'lgbm_learning_rate': 0.23698102224629278, 'lgbm_num_leaves': 68, 'xgb_n_estimators': 485, 'xgb_learning_rate': 0.25126731936000923, 'xgb_max_depth': 5, 'cat_iterations': 618, 'cat_learning_rate': 0.11757850028969041, 'cat_depth': 8, 'w_lgbm': 0.4400531562161077, 'w_xgb': 0.419933067537171, 'w_cat': 0.4796184816836555}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000988 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:42:34,329] Trial 24 finished with value: 0.16468252003311556 and parameters: {'lgbm_n_estimators': 928, 'lgbm_learning_rate': 0.2768043022233817, 'lgbm_num_leaves': 49, 'xgb_n_estimators': 654, 'xgb_learning_rate': 0.17740457292697245, 'xgb_max_depth': 7, 'cat_iterations': 665, 'cat_learning_rate': 0.07174494039082074, 'cat_depth': 9, 'w_lgbm': 0.3255938568844248, 'w_xgb': 0.5958068284061975, 'w_cat': 0.593370941296554}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000231 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:42:38,905] Trial 25 finished with value: 0.16948748267953903 and parameters: {'lgbm_n_estimators': 803, 'lgbm_learning_rate': 0.19696874580191256, 'lgbm_num_leaves': 57, 'xgb_n_estimators': 486, 'xgb_learning_rate': 0.24281919211072578, 'xgb_max_depth': 8, 'cat_iterations': 779, 'cat_learning_rate': 0.05298396690708132, 'cat_depth': 7, 'w_lgbm': 0.5865646716895068, 'w_xgb': 0.49272659826132675, 'w_cat': 0.28591236988555513}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000565 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:42:48,609] Trial 26 finished with value: 0.16302264718367923 and parameters: {'lgbm_n_estimators': 693, 'lgbm_learning_rate': 0.22418461425201083, 'lgbm_num_leaves': 80, 'xgb_n_estimators': 785, 'xgb_learning_rate': 0.19682106377015712, 'xgb_max_depth': 5, 'cat_iterations': 555, 'cat_learning_rate': 0.1541746605763542, 'cat_depth': 9, 'w_lgbm': 0.15568673862953375, 'w_xgb': 0.0034314446255095943, 'w_cat': 0.42162468542581666}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000191 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:42:55,548] Trial 27 finished with value: 0.16389586689757019 and parameters: {'lgbm_n_estimators': 580, 'lgbm_learning_rate': 0.2627267925701176, 'lgbm_num_leaves': 66, 'xgb_n_estimators': 335, 'xgb_learning_rate': 0.15035289063788493, 'xgb_max_depth': 7, 'cat_iterations': 932, 'cat_learning_rate': 0.1290222951562741, 'cat_depth': 8, 'w_lgbm': 0.4819814569661878, 'w_xgb': 0.2130403630143478, 'w_cat': 0.5336616960487612}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000258 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:42:58,401] Trial 28 finished with value: 0.18384484126099665 and parameters: {'lgbm_n_estimators': 908, 'lgbm_learning_rate': 0.2964816788043921, 'lgbm_num_leaves': 50, 'xgb_n_estimators': 188, 'xgb_learning_rate': 0.253376477560593, 'xgb_max_depth': 5, 'cat_iterations': 376, 'cat_learning_rate': 0.10028871969849555, 'cat_depth': 6, 'w_lgbm': 0.28942772948112555, 'w_xgb': 0.3836293775351264, 'w_cat': 0.6351609596576759}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002073 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:43:19,767] Trial 29 finished with value: 0.16274323191860834 and parameters: {'lgbm_n_estimators': 784, 'lgbm_learning_rate': 0.1742688990465838, 'lgbm_num_leaves': 39, 'xgb_n_estimators': 272, 'xgb_learning_rate': 0.1730244836733803, 'xgb_max_depth': 8, 'cat_iterations': 808, 'cat_learning_rate': 0.19474954851547804, 'cat_depth': 10, 'w_lgbm': 0.36105676514030655, 'w_xgb': 0.29620432456783147, 'w_cat': 0.20217817680003788}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000255 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:43:26,593] Trial 30 finished with value: 0.16257673022723595 and parameters: {'lgbm_n_estimators': 790, 'lgbm_learning_rate': 0.19908838155729316, 'lgbm_num_leaves': 35, 'xgb_n_estimators': 409, 'xgb_learning_rate': 0.1225246565070916, 'xgb_max_depth': 7, 'cat_iterations': 557, 'cat_learning_rate': 0.1781238902984914, 'cat_depth': 9, 'w_lgbm': 0.4198972550369992, 'w_xgb': 0.13510372872654342, 'w_cat': 0.9822037060451541}. Best is trial 4 with value: 0.1584136727393995.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000350 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:43:51,521] Trial 31 finished with value: 0.15760591279516745 and parameters: {'lgbm_n_estimators': 445, 'lgbm_learning_rate': 0.2854195326363704, 'lgbm_num_leaves': 88, 'xgb_n_estimators': 609, 'xgb_learning_rate': 0.1863474925075952, 'xgb_max_depth': 10, 'cat_iterations': 902, 'cat_learning_rate': 0.2097028668825592, 'cat_depth': 10, 'w_lgbm': 0.6375011954477277, 'w_xgb': 0.696300690613558, 'w_cat': 0.8159784846992915}. Best is trial 31 with value: 0.15760591279516745.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000287 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:44:16,550] Trial 32 finished with value: 0.15812858515431463 and parameters: {'lgbm_n_estimators': 461, 'lgbm_learning_rate': 0.2834154328013379, 'lgbm_num_leaves': 89, 'xgb_n_estimators': 603, 'xgb_learning_rate': 0.1876913539056346, 'xgb_max_depth': 9, 'cat_iterations': 925, 'cat_learning_rate': 0.20015637123641752, 'cat_depth': 10, 'w_lgbm': 0.637297894577867, 'w_xgb': 0.6335399439718007, 'w_cat': 0.6872445303137114}. Best is trial 31 with value: 0.15760591279516745.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000394 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:44:40,457] Trial 33 finished with value: 0.1611245832055528 and parameters: {'lgbm_n_estimators': 481, 'lgbm_learning_rate': 0.27451347582141833, 'lgbm_num_leaves': 90, 'xgb_n_estimators': 523, 'xgb_learning_rate': 0.1607796042631588, 'xgb_max_depth': 9, 'cat_iterations': 943, 'cat_learning_rate': 0.20825290766341303, 'cat_depth': 10, 'w_lgbm': 0.6374837788169923, 'w_xgb': 0.8787804571523777, 'w_cat': 0.8100034213003111}. Best is trial 31 with value: 0.15760591279516745.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000455 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:45:04,302] Trial 34 finished with value: 0.1644779829228753 and parameters: {'lgbm_n_estimators': 473, 'lgbm_learning_rate': 0.27669920931108444, 'lgbm_num_leaves': 85, 'xgb_n_estimators': 582, 'xgb_learning_rate': 0.2087792950882033, 'xgb_max_depth': 10, 'cat_iterations': 916, 'cat_learning_rate': 0.21386621833205197, 'cat_depth': 10, 'w_lgbm': 0.7919708333628841, 'w_xgb': 0.637590077441236, 'w_cat': 0.853948848258266}. Best is trial 31 with value: 0.15760591279516745.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000501 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:45:27,320] Trial 35 finished with value: 0.15963308575811477 and parameters: {'lgbm_n_estimators': 530, 'lgbm_learning_rate': 0.25663108652191075, 'lgbm_num_leaves': 92, 'xgb_n_estimators': 593, 'xgb_learning_rate': 0.1952678896078644, 'xgb_max_depth': 9, 'cat_iterations': 879, 'cat_learning_rate': 0.17301800112236182, 'cat_depth': 10, 'w_lgbm': 0.621386669474079, 'w_xgb': 0.7527596445722707, 'w_cat': 0.6849597710917432}. Best is trial 31 with value: 0.15760591279516745.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000379 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:45:50,516] Trial 36 finished with value: 0.1611535277660533 and parameters: {'lgbm_n_estimators': 546, 'lgbm_learning_rate': 0.23168106473854655, 'lgbm_num_leaves': 100, 'xgb_n_estimators': 554, 'xgb_learning_rate': 0.2294426795359299, 'xgb_max_depth': 9, 'cat_iterations': 871, 'cat_learning_rate': 0.22975357390294995, 'cat_depth': 10, 'w_lgbm': 0.8783427490712054, 'w_xgb': 0.7382865340405791, 'w_cat': 0.7048237901651984}. Best is trial 31 with value: 0.15760591279516745.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000333 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:45:53,367] Trial 37 finished with value: 0.16582227625770762 and parameters: {'lgbm_n_estimators': 426, 'lgbm_learning_rate': 0.26268648467131106, 'lgbm_num_leaves': 91, 'xgb_n_estimators': 472, 'xgb_learning_rate': 0.2016210849903864, 'xgb_max_depth': 10, 'cat_iterations': 899, 'cat_learning_rate': 0.165784475728779, 'cat_depth': 4, 'w_lgbm': 0.7562355331840586, 'w_xgb': 0.8508206394373404, 'w_cat': 0.5360993269798251}. Best is trial 31 with value: 0.15760591279516745.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000249 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:46:19,401] Trial 38 finished with value: 0.16008104352016508 and parameters: {'lgbm_n_estimators': 352, 'lgbm_learning_rate': 0.25019785253746585, 'lgbm_num_leaves': 80, 'xgb_n_estimators': 700, 'xgb_learning_rate': 0.1689962487951829, 'xgb_max_depth': 9, 'cat_iterations': 969, 'cat_learning_rate': 0.1949438807882592, 'cat_depth': 10, 'w_lgbm': 0.6471429159055582, 'w_xgb': 0.763296672828053, 'w_cat': 0.8753725529552935}. Best is trial 31 with value: 0.15760591279516745.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000296 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:46:42,564] Trial 39 finished with value: 0.1667002917823445 and parameters: {'lgbm_n_estimators': 634, 'lgbm_learning_rate': 0.011471476076933912, 'lgbm_num_leaves': 89, 'xgb_n_estimators': 375, 'xgb_learning_rate': 0.2990996180850034, 'xgb_max_depth': 8, 'cat_iterations': 869, 'cat_learning_rate': 0.26286183782369993, 'cat_depth': 10, 'w_lgbm': 0.5993950431687285, 'w_xgb': 0.9849193553545913, 'w_cat': 0.9942361006093173}. Best is trial 31 with value: 0.15760591279516745.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000299 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:46:47,845] Trial 40 finished with value: 0.16648594868477928 and parameters: {'lgbm_n_estimators': 521, 'lgbm_learning_rate': 0.2824871813044612, 'lgbm_num_leaves': 95, 'xgb_n_estimators': 530, 'xgb_learning_rate': 0.2132847839579011, 'xgb_max_depth': 10, 'cat_iterations': 838, 'cat_learning_rate': 0.20828109447655885, 'cat_depth': 5, 'w_lgbm': 0.8202462850618473, 'w_xgb': 0.6739299538629406, 'w_cat': 0.02209248342057346}. Best is trial 31 with value: 0.15760591279516745.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000273 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:47:11,913] Trial 41 finished with value: 0.16250194209087637 and parameters: {'lgbm_n_estimators': 335, 'lgbm_learning_rate': 0.2503322873226936, 'lgbm_num_leaves': 82, 'xgb_n_estimators': 692, 'xgb_learning_rate': 0.16183438584518406, 'xgb_max_depth': 9, 'cat_iterations': 967, 'cat_learning_rate': 0.1907092192177154, 'cat_depth': 10, 'w_lgbm': 0.697278868873054, 'w_xgb': 0.7696754910401591, 'w_cat': 0.8960247611165377}. Best is trial 31 with value: 0.15760591279516745.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000431 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:47:22,933] Trial 42 finished with value: 0.16219182711161204 and parameters: {'lgbm_n_estimators': 344, 'lgbm_learning_rate': 0.25562083245372025, 'lgbm_num_leaves': 75, 'xgb_n_estimators': 748, 'xgb_learning_rate': 0.16815315850791315, 'xgb_max_depth': 9, 'cat_iterations': 959, 'cat_learning_rate': 0.2015450433954144, 'cat_depth': 9, 'w_lgbm': 0.6631259867196351, 'w_xgb': 0.7236698510094052, 'w_cat': 0.7752544373629969}. Best is trial 31 with value: 0.15760591279516745.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000254 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:47:45,963] Trial 43 finished with value: 0.16353080838274783 and parameters: {'lgbm_n_estimators': 447, 'lgbm_learning_rate': 0.23861361904331976, 'lgbm_num_leaves': 92, 'xgb_n_estimators': 591, 'xgb_learning_rate': 0.20261271511751847, 'xgb_max_depth': 10, 'cat_iterations': 905, 'cat_learning_rate': 0.17478041680825399, 'cat_depth': 10, 'w_lgbm': 0.009296278521896816, 'w_xgb': 0.8462571412338666, 'w_cat': 0.8464650302607765}. Best is trial 31 with value: 0.15760591279516745.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000305 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:48:12,344] Trial 44 finished with value: 0.1626387015338236 and parameters: {'lgbm_n_estimators': 386, 'lgbm_learning_rate': 0.2841974012476829, 'lgbm_num_leaves': 86, 'xgb_n_estimators': 833, 'xgb_learning_rate': 0.13921726213013752, 'xgb_max_depth': 9, 'cat_iterations': 996, 'cat_learning_rate': 0.22241785852035395, 'cat_depth': 10, 'w_lgbm': 0.7329453003995294, 'w_xgb': 0.6445558017993855, 'w_cat': 0.9453632027774064}. Best is trial 31 with value: 0.15760591279516745.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000238 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:48:18,847] Trial 45 finished with value: 0.1625278250757241 and parameters: {'lgbm_n_estimators': 256, 'lgbm_learning_rate': 0.21392896726184568, 'lgbm_num_leaves': 97, 'xgb_n_estimators': 687, 'xgb_learning_rate': 0.219758082002887, 'xgb_max_depth': 10, 'cat_iterations': 479, 'cat_learning_rate': 0.18153387618066377, 'cat_depth': 9, 'w_lgbm': 0.6257295020685888, 'w_xgb': 0.805513425644974, 'w_cat': 0.7271613783895305}. Best is trial 31 with value: 0.15760591279516745.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000307 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:48:40,544] Trial 46 finished with value: 0.16196762669591278 and parameters: {'lgbm_n_estimators': 178, 'lgbm_learning_rate': 0.18889145507583419, 'lgbm_num_leaves': 77, 'xgb_n_estimators': 274, 'xgb_learning_rate': 0.15399335172061862, 'xgb_max_depth': 8, 'cat_iterations': 843, 'cat_learning_rate': 0.1578756612614363, 'cat_depth': 10, 'w_lgbm': 0.4772358647958146, 'w_xgb': 0.9098820947111352, 'w_cat': 0.8846648442201139}. Best is trial 31 with value: 0.15760591279516745.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000273 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:48:51,268] Trial 47 finished with value: 0.16367025132067564 and parameters: {'lgbm_n_estimators': 298, 'lgbm_learning_rate': 0.28638593361807485, 'lgbm_num_leaves': 82, 'xgb_n_estimators': 728, 'xgb_learning_rate': 0.1217222558834314, 'xgb_max_depth': 9, 'cat_iterations': 883, 'cat_learning_rate': 0.14432508060700538, 'cat_depth': 9, 'w_lgbm': 0.5734505496222456, 'w_xgb': 0.682937277405536, 'w_cat': 0.683703320997544}. Best is trial 31 with value: 0.15760591279516745.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000464 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:48:54,211] Trial 48 finished with value: 0.16864553087800419 and parameters: {'lgbm_n_estimators': 366, 'lgbm_learning_rate': 0.22589968000234145, 'lgbm_num_leaves': 72, 'xgb_n_estimators': 455, 'xgb_learning_rate': 0.17972594259955477, 'xgb_max_depth': 8, 'cat_iterations': 731, 'cat_learning_rate': 0.22905908005350736, 'cat_depth': 4, 'w_lgbm': 0.9578536499077765, 'w_xgb': 0.6162655731045845, 'w_cat': 0.582314671878671}. Best is trial 31 with value: 0.15760591279516745.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001864 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 21:49:00,214] Trial 49 finished with value: 0.16109116939006468 and parameters: {'lgbm_n_estimators': 533, 'lgbm_learning_rate': 0.15218947242869105, 'lgbm_num_leaves': 88, 'xgb_n_estimators': 583, 'xgb_learning_rate': 0.2700175933375156, 'xgb_max_depth': 10, 'cat_iterations': 180, 'cat_learning_rate': 0.24360926943894337, 'cat_depth': 10, 'w_lgbm': 0.6632055891012665, 'w_xgb': 0.7785656763752051, 'w_cat': 0.8181059531024146}. Best is trial 31 with value: 0.15760591279516745.


Best trial:
FrozenTrial(number=31, state=1, values=[0.15760591279516745], datetime_start=datetime.datetime(2025, 7, 23, 21, 43, 26, 594258), datetime_complete=datetime.datetime(2025, 7, 23, 21, 43, 51, 521896), params={'lgbm_n_estimators': 445, 'lgbm_learning_rate': 0.2854195326363704, 'lgbm_num_leaves': 88, 'xgb_n_estimators': 609, 'xgb_learning_rate': 0.1863474925075952, 'xgb_max_depth': 10, 'cat_iterations': 902, 'cat_learning_rate': 0.2097028668825592, 'cat_depth': 10, 'w_lgbm': 0.6375011954477277, 'w_xgb': 0.696300690613558, 'w_cat': 0.8159784846992915}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'lgbm_n_estimators': IntDistribution(high=1000, log=False, low=100, step=1), 'lgbm_learning_rate': FloatDistribution(high=0.3, log=False, low=0.01, step=None), 'lgbm_num_leaves': IntDistribution(high=100, log=False, low=20, step=1), 'xgb_n_estimators': IntDistribution(high=1000, log=False, low=100, step=1), 'xgb_learning_rate': FloatDistribution(high=0.3, log=F

In [24]:
# 1. 최적 파라미터 추출
best_params = study.best_trial.params

# 2. 모델 생성 (최적 파라미터 적용)
lgbm = LGBMRegressor(
    n_estimators=best_params['lgbm_n_estimators'],
    learning_rate=best_params['lgbm_learning_rate'],
    num_leaves=best_params['lgbm_num_leaves'],
    random_state=42
)
xgb = XGBRegressor(
    n_estimators=best_params['xgb_n_estimators'],
    learning_rate=best_params['xgb_learning_rate'],
    max_depth=best_params['xgb_max_depth'],
    random_state=42
)
cat = CatBoostRegressor(
    iterations=best_params['cat_iterations'],
    learning_rate=best_params['cat_learning_rate'],
    depth=best_params['cat_depth'],
    random_seed=42,
    verbose=0
)

# 3. 전체 train 데이터로 재학습
X_full = train.drop(['ID', 'stress_score'], axis=1, errors='ignore')
y_full = train['stress_score']

lgbm.fit(X_full, y_full)
xgb.fit(X_full, y_full)
cat.fit(X_full, y_full)

# 4. test 데이터 예측
pred_lgbm = lgbm.predict(test)
pred_xgb = xgb.predict(test)
pred_cat = cat.predict(test)

# 5. 앙상블 가중치 정규화
w_lgbm = best_params['w_lgbm']
w_xgb = best_params['w_xgb']
w_cat = best_params['w_cat']
total = w_lgbm + w_xgb + w_cat
w_lgbm /= total
w_xgb /= total
w_cat /= total

# 6. 앙상블 예측
pred_ensemble = w_lgbm * pred_lgbm + w_xgb * pred_xgb + w_cat * pred_cat

# 7. 제출 파일 생성
submission = pd.read_csv('./sample_submission.csv')
submission['stress_score'] = pred_ensemble
submission.to_csv('submit.csv', index=False)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000258 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1842
[LightGBM] [Info] Number of data points in the train set: 3000, number of used features: 30
[LightGBM] [Info] Start training from score 0.482130


In [None]:
# importances = lgbm.feature_importances_
# feature_names = x_train.columns

# importance_df = pd.DataFrame({
#     'Feature': feature_names,
#     'Importance': importances
# }).sort_values(by='Importance', ascending=False)

# plt.figure(figsize=(10, 6))
# sns.barplot(data=importance_df.head(20), x='Importance', y='Feature')
# plt.title('Top 20 Feature Importances (LGBM)')
# plt.show()

In [None]:
submission = pd.read_csv('./sample_submission.csv')

In [None]:
submission['stress_score'] = pred
submission.head()

Unnamed: 0,ID,stress_score
0,TEST_0000,0.495695
1,TEST_0001,0.626018
2,TEST_0002,0.230449
3,TEST_0003,0.447243
4,TEST_0004,0.525021


In [None]:
submission.to_csv('submit.csv', index=False)

In [None]:
# train에서 나눠서 데이콘에서 측정하는 MAE와 다를수있음
# 성능이 올라갔는지 그대로인지 내려갔는지 경향만 파악할 수 있음
# 확인용으로만 쓸것

import datetime

# MAE 로그파일 세팅
def loging(MAE):
    with open('./mae_log', mode='a', encoding='utf-8') as f:
        current_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        f.write(f"시간: {current_time} / MAE: {MAE}\n")

# MAE 체크용
train_set, val_set = train_test_split(train, test_size=0.2, random_state=42)

X_train = train_set.drop(['ID', 'stress_score'], axis=1)
y_train = train_set['stress_score']

X_val = val_set.drop(['ID', 'stress_score'], axis=1) 
y_val = val_set['stress_score']

lgbm = LGBMRegressor(random_state=42)
lgbm.fit(X_train, y_train)

val_pred = lgbm.predict(X_val)

MAE = mean_absolute_error(y_val, val_pred)

print(f"모델의 검증 MAE 점수: {MAE}")
loging(MAE)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000166 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1832
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 30
[LightGBM] [Info] Start training from score 0.481912
모델의 검증 MAE 점수: 0.20487542542712187
