## 1. 데이터 불러오기

In [1]:
import numpy as np
import pandas as pd
from lightgbm import LGBMRegressor
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split

In [2]:
train = pd.read_csv('./train.csv')
test = pd.read_csv('./test.csv')

"""
[train.csv]                                         [test.csv]
ID : 샘플별 고유 ID                                 ID : 샘플별 고유 ID
gender : 성별                                       gender : 성별
age : 연령                                          age : 연령
height : 키(cm)                                     height : 키(cm)
weight : 몸무게(kg)                                 weight : 몸무게(kg)
cholesterol : 콜레스테롤 수치                       cholesterol : 콜레스테롤 수치
systolic_blood_pressure : 수축기 혈압               systolic_blood_pressure : 수축기 혈압
diastolic_blood_pressure : 이완기 혈압              diastolic_blood_pressure : 이완기 혈압
glucose : 혈당 수치(mg/dL)                          glucose : 혈당 수치(mg/dL)
bone_density : 골밀도(g/cm²)                        bone_density : 골밀도(g/cm²)
activity : 생활시 운동 강도                         activity : 생활시 운동 강도
smoke_status : 흡연 상태                            smoke_status : 흡연 상태
medical_history : 만성질환                          medical_history : 만성질환
family_medical_history : 가족력                     family_medical_history : 가족력
sleep_pattern : 수면패턴                            sleep_pattern : 수면패턴
edu_level : 학력                                    edu_level : 학력
mean_working : 1주일당 평균 근로 시간               mean_working : 1주일당 평균 근로 시간
stress_score : (TARGET) 스트레스 점수                    
"""

display(train.shape)

(3000, 18)

## 2. 데이터 전처리

In [3]:
# 카테고리형 변수의 결측값을 최빈값으로 대체
# 최빈값 -> 가장 많이 관측된 값
categorical_na_cols = train.select_dtypes(include=['object', 'category']).columns.drop('ID')

for col in categorical_na_cols:
    # # 학습 데이터에서 최빈값 계산
    # most_frequent = train[col].mode()[0]
    # print(f"{col}: {most_frequent}")
    # # 학습 데이터와 테스트 데이터 모두 해당 칼럼의 최빈값으로 대체
    # train[col] = train[col].fillna(most_frequent)
    # test[col] = test[col].fillna(most_frequent)

    train[col] = train[col].fillna('None')
    test[col] = test[col].fillna('None')

    print(f"{col}: {train[col].unique()}")

gender: ['F' 'M']
activity: ['moderate' 'light' 'intense']
smoke_status: ['ex-smoker' 'non-smoker' 'current-smoker']
medical_history: ['high blood pressure' 'None' 'diabetes' 'heart disease']
family_medical_history: ['diabetes' 'None' 'high blood pressure' 'heart disease']
sleep_pattern: ['sleep difficulty' 'normal' 'oversleeping']
edu_level: ['bachelors degree' 'graduate degree' 'high school diploma' 'None']


In [6]:
# mean_working이 NaN이거나 0이면 0, 아니면 1로 is_working 컬럼 추가
train['is_working'] = ((train['mean_working'].fillna(0)) != 0).astype(int)
test['is_working'] = ((test['mean_working'].fillna(0)) != 0).astype(int)

In [7]:
# 1. 컬럼 목록에서 is_working이 있는지 확인
print('is_working' in train.columns)  # True면 정상 추가

# 2. train 데이터프레임의 상위 5개 행에서 is_working 컬럼 확인
print(train[['mean_working', 'is_working']].head())

# 3. is_working 컬럼의 값 분포(0/1 개수) 확인
print(train['is_working'].value_counts())

True
   mean_working  is_working
0           NaN           0
1           NaN           0
2           9.0           1
3           NaN           0
4           NaN           0
is_working
1    1968
0    1032
Name: count, dtype: int64


In [8]:
# mean_working에 대해 중앙값 대체
# median_value = train['mean_working'].median()

# mean_working 결측치 -> 0
# train['mean_working'] = train['mean_working'].fillna(0)
# test['mean_working'] = test['mean_working'].fillna(0)

# mean_working 결측치를 평균값으로 대체
mean_value = train['mean_working'].mean()
train['mean_working'] = train['mean_working'].fillna(mean_value)
test['mean_working'] = test['mean_working'].fillna(mean_value)

display(train.columns)

Index(['ID', 'gender', 'age', 'height', 'weight', 'cholesterol',
       'systolic_blood_pressure', 'diastolic_blood_pressure', 'glucose',
       'bone_density', 'activity', 'smoke_status', 'medical_history',
       'family_medical_history', 'sleep_pattern', 'edu_level', 'mean_working',
       'stress_score', 'is_working'],
      dtype='object')

In [9]:
# One-Hot Encoding 적용 열 - gender, smoke_status, medical_history, family_medical_history
# 원핫 인코딩 => 카테고리끼리 순서 없을때 사용
# smoke_status는 좀 애매하긴 한듯
def one_hot_encoding(df):
    one_hot_encoding_cols = ['gender', 'smoke_status', 'medical_history', 'family_medical_history']
    encoding_df = pd.get_dummies(df, columns=one_hot_encoding_cols)
    return encoding_df

train = one_hot_encoding(train)
test = one_hot_encoding(test)

display(train.columns)
display(train.shape)

Index(['ID', 'age', 'height', 'weight', 'cholesterol',
       'systolic_blood_pressure', 'diastolic_blood_pressure', 'glucose',
       'bone_density', 'activity', 'sleep_pattern', 'edu_level',
       'mean_working', 'stress_score', 'is_working', 'gender_F', 'gender_M',
       'smoke_status_current-smoker', 'smoke_status_ex-smoker',
       'smoke_status_non-smoker', 'medical_history_None',
       'medical_history_diabetes', 'medical_history_heart disease',
       'medical_history_high blood pressure', 'family_medical_history_None',
       'family_medical_history_diabetes',
       'family_medical_history_heart disease',
       'family_medical_history_high blood pressure'],
      dtype='object')

(3000, 28)

In [10]:
# Label Encoding 적용 열 - activity, sleep_pattern, edu_level
# 라벨 인코딩 => 카테고리끼리 순서 있을때 사용
def label_encoding(df):
    label_encoding_cols = ['activity', 'sleep_pattern', 'edu_level']
    for feature in label_encoding_cols:
        le = LabelEncoder()
        le.fit(df[feature])
        df[feature] = le.transform(df[feature])
    return df

train = label_encoding(train)
test = label_encoding(test)

In [11]:
# 모든 bool 컬럼을 int로 변환
for col in train.columns:
    if train[col].dtype == 'bool':
        train[col] = train[col].astype(int)
for col in test.columns:
    if test[col].dtype == 'bool':
        test[col] = test[col].astype(int)

In [12]:
train

Unnamed: 0,ID,age,height,weight,cholesterol,systolic_blood_pressure,diastolic_blood_pressure,glucose,bone_density,activity,...,smoke_status_ex-smoker,smoke_status_non-smoker,medical_history_None,medical_history_diabetes,medical_history_heart disease,medical_history_high blood pressure,family_medical_history_None,family_medical_history_diabetes,family_medical_history_heart disease,family_medical_history_high blood pressure
0,TRAIN_0000,72,161.49,58.47,279.84,165,100,143.35,0.87,2,...,1,0,0,0,0,1,0,1,0,0
1,TRAIN_0001,88,179.87,77.60,257.37,178,111,146.94,0.07,2,...,1,0,1,0,0,0,0,1,0,0
2,TRAIN_0002,47,182.47,89.93,226.66,134,95,142.61,1.18,1,...,1,0,1,0,0,0,1,0,0,0
3,TRAIN_0003,69,185.78,68.63,206.74,158,92,137.26,0.48,0,...,1,0,0,0,0,1,1,0,0,0
4,TRAIN_0004,81,164.63,71.53,255.92,171,116,129.37,0.34,2,...,1,0,0,1,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,TRAIN_2995,87,174.43,77.14,233.31,176,111,134.71,0.20,1,...,0,1,1,0,0,0,1,0,0,0
2996,TRAIN_2996,25,171.44,51.67,196.89,143,86,111.91,1.75,2,...,0,0,1,0,0,0,1,0,0,0
2997,TRAIN_2997,54,162.13,55.54,210.52,164,91,136.47,0.80,1,...,0,0,1,0,0,0,1,0,0,0
2998,TRAIN_2998,70,181.32,82.20,234.10,182,91,144.89,0.58,2,...,0,1,1,0,0,0,1,0,0,0


## 3. 피처 엔지니어링

In [13]:
# BMI => 몸무게[kg] / 키[cm]^2
# 혈압차(맥압) => 수축성 혈압 - 이완성 혈압
def add_features(df):
    df['BMI'] = df['weight'] / ((df['height']/100)**2)
    df['bp_diff'] = df['systolic_blood_pressure'] - df['diastolic_blood_pressure']
    return df

train = add_features(train)
test = add_features(test)

In [14]:
display(train['mean_working'].describe())

# working_group 피처 만들기
bins = [3, 8, 9, 16]  # 적절한 경계값 설정
labels = [0, 1, 2]     # 저, 중하, 중상, 고 근무

train['working_group'] = pd.cut(train['mean_working'], bins=bins, labels=labels, include_lowest=True)
test['working_group'] = pd.cut(test['mean_working'], bins=bins, labels=labels, include_lowest=True)

overwork_threshold = train['mean_working'].quantile(0.95)  # 상위 5% 기준값
train['is_overwork'] = (train['mean_working'] >= overwork_threshold).astype(int)
test['is_overwork'] = (test['mean_working'] >= overwork_threshold).astype(int)

display(train['working_group'].value_counts())
display(train['is_overwork'].value_counts())

count    3000.000000
mean        8.716972
std         1.319229
min         4.000000
25%         8.000000
50%         8.716972
75%         9.000000
max        16.000000
Name: mean_working, dtype: float64

working_group
1    1569
0     888
2     543
Name: count, dtype: int64

is_overwork
0    2803
1     197
Name: count, dtype: int64

In [15]:
# mean_working & sleep_pattern 피처 엔지니어링

train['work_sleep_imbalance'] = train['mean_working'] / (train['sleep_pattern'] + 1)
test['work_sleep_imbalance'] = test['mean_working'] / (test['sleep_pattern'] + 1)

display(train[['work_sleep_imbalance', 'stress_score']])

for col in train.columns:
    if col not in ['ID', 'stress_score']:
        corr_val = train['stress_score'].corr(train[col])
        print(f"{col} ↔ stress_score 상관계수: {corr_val:.4f}")

Unnamed: 0,work_sleep_imbalance,stress_score
0,2.905657,0.63
1,8.716972,0.83
2,9.000000,0.70
3,4.358486,0.17
4,2.905657,0.36
...,...,...
2995,4.358486,0.02
2996,9.000000,0.16
2997,9.000000,0.16
2998,4.358486,0.18


age ↔ stress_score 상관계수: 0.0187
height ↔ stress_score 상관계수: -0.0057
weight ↔ stress_score 상관계수: 0.0113
cholesterol ↔ stress_score 상관계수: 0.0213
systolic_blood_pressure ↔ stress_score 상관계수: 0.0156
diastolic_blood_pressure ↔ stress_score 상관계수: 0.0254
glucose ↔ stress_score 상관계수: -0.0061
bone_density ↔ stress_score 상관계수: -0.0226
activity ↔ stress_score 상관계수: 0.0070
sleep_pattern ↔ stress_score 상관계수: 0.0002
edu_level ↔ stress_score 상관계수: -0.0271
mean_working ↔ stress_score 상관계수: 0.1507
is_working ↔ stress_score 상관계수: -0.0228
gender_F ↔ stress_score 상관계수: 0.0136
gender_M ↔ stress_score 상관계수: -0.0136
smoke_status_current-smoker ↔ stress_score 상관계수: 0.0261
smoke_status_ex-smoker ↔ stress_score 상관계수: -0.0320
smoke_status_non-smoker ↔ stress_score 상관계수: 0.0087
medical_history_None ↔ stress_score 상관계수: -0.0504
medical_history_diabetes ↔ stress_score 상관계수: 0.0245
medical_history_heart disease ↔ stress_score 상관계수: 0.0000
medical_history_high blood pressure ↔ stress_score 상관계수: 0.0373
family_medical

## 4. 모델링

In [16]:
from sklearn.model_selection import train_test_split

X = train.drop(['ID', 'stress_score'], axis=1)
y = train['stress_score']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [17]:
# x_train = train.drop(['ID', 'stress_score'], axis = 1)
# x_val = train['stress_score']
# y_train = train['stress_score']

test = test.drop('ID', axis = 1)

In [18]:
# lgbm = LGBMRegressor(random_state = 42)
# lgbm.fit(x_train, y_train)

# pred = lgbm.predict(test)

In [19]:
# category 타입 컬럼을 int로 변환
for col in X_train.columns:
    if str(X_train[col].dtype) == 'category':
        X_train[col] = X_train[col].astype(int)
        X_val[col] = X_val[col].astype(int)

# train, test의 category 타입 컬럼을 int로 변환
for col in train.columns:
    if str(train[col].dtype) == 'category':
        train[col] = train[col].astype(int)
for col in test.columns:
    if str(test[col].dtype) == 'category':
        test[col] = test[col].astype(int)

In [20]:
import optuna
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error

def objective(trial):
    # 하이퍼파라미터 샘플링
    lgbm_params = {
        'n_estimators': trial.suggest_int('lgbm_n_estimators', 100, 1000),
        'learning_rate': trial.suggest_float('lgbm_learning_rate', 0.01, 0.3),
        'num_leaves': trial.suggest_int('lgbm_num_leaves', 20, 100),
        'random_state': 42
    }
    xgb_params = {
        'n_estimators': trial.suggest_int('xgb_n_estimators', 100, 1000),
        'learning_rate': trial.suggest_float('xgb_learning_rate', 0.01, 0.3),
        'max_depth': trial.suggest_int('xgb_max_depth', 3, 10),
        'random_state': 42
    }
    cat_params = {
        'iterations': trial.suggest_int('cat_iterations', 100, 1000),
        'learning_rate': trial.suggest_float('cat_learning_rate', 0.01, 0.3),
        'depth': trial.suggest_int('cat_depth', 3, 10),
        'random_seed': 42,
        'verbose': 0
    }

    # 모델 학습
    lgbm = LGBMRegressor(**lgbm_params)
    xgb = XGBRegressor(**xgb_params)
    cat = CatBoostRegressor(**cat_params)

    lgbm.fit(X_train, y_train)
    xgb.fit(X_train, y_train)
    cat.fit(X_train, y_train)

    # 예측
    pred_lgbm = lgbm.predict(X_val)
    pred_xgb = xgb.predict(X_val)
    pred_cat = cat.predict(X_val)

    # 앙상블 가중치 샘플링 및 정규화
    w_lgbm = trial.suggest_float('w_lgbm', 0, 1)
    w_xgb = trial.suggest_float('w_xgb', 0, 1)
    w_cat = trial.suggest_float('w_cat', 0, 1)
    total = w_lgbm + w_xgb + w_cat
    w_lgbm /= total
    w_xgb /= total
    w_cat /= total

    # 앙상블 예측
    pred_ensemble = w_lgbm * pred_lgbm + w_xgb * pred_xgb + w_cat * pred_cat

    # MAE 계산
    mae = mean_absolute_error(y_val, pred_ensemble)
    return mae

# Optuna Study 생성 및 최적화
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

print('Best trial:')
print(study.best_trial)
print('Best params:', study.best_trial.params)

[I 2025-07-23 22:46:29,301] A new study created in memory with name: no-name-5992b53e-d579-4b58-acbd-70cd9bf9a66a


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000209 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:46:33,359] Trial 0 finished with value: 0.17805412300276183 and parameters: {'lgbm_n_estimators': 200, 'lgbm_learning_rate': 0.2380929064513637, 'lgbm_num_leaves': 24, 'xgb_n_estimators': 709, 'xgb_learning_rate': 0.2176778754078496, 'xgb_max_depth': 3, 'cat_iterations': 253, 'cat_learning_rate': 0.08700465236480157, 'cat_depth': 9, 'w_lgbm': 0.8484450625046581, 'w_xgb': 0.017097546201997882, 'w_cat': 0.8680394893324958}. Best is trial 0 with value: 0.17805412300276183.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000248 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:46:35,407] Trial 1 finished with value: 0.19401169550204178 and parameters: {'lgbm_n_estimators': 687, 'lgbm_learning_rate': 0.02258560628412292, 'lgbm_num_leaves': 85, 'xgb_n_estimators': 697, 'xgb_learning_rate': 0.07459352603785672, 'xgb_max_depth': 5, 'cat_iterations': 173, 'cat_learning_rate': 0.13035545360245196, 'cat_depth': 5, 'w_lgbm': 0.517739420120429, 'w_xgb': 0.10346682329568502, 'w_cat': 0.8133431564654362}. Best is trial 0 with value: 0.17805412300276183.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000275 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:46:37,450] Trial 2 finished with value: 0.17007482263214702 and parameters: {'lgbm_n_estimators': 489, 'lgbm_learning_rate': 0.18770303210549255, 'lgbm_num_leaves': 55, 'xgb_n_estimators': 720, 'xgb_learning_rate': 0.20962736579665264, 'xgb_max_depth': 7, 'cat_iterations': 991, 'cat_learning_rate': 0.20954135679438735, 'cat_depth': 4, 'w_lgbm': 0.9894995969115068, 'w_xgb': 0.3913115664066905, 'w_cat': 0.864408431542625}. Best is trial 2 with value: 0.17007482263214702.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000243 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:46:51,423] Trial 3 finished with value: 0.17638532883792468 and parameters: {'lgbm_n_estimators': 748, 'lgbm_learning_rate': 0.04217809262607204, 'lgbm_num_leaves': 91, 'xgb_n_estimators': 130, 'xgb_learning_rate': 0.03923330144603688, 'xgb_max_depth': 3, 'cat_iterations': 695, 'cat_learning_rate': 0.09557824808979423, 'cat_depth': 10, 'w_lgbm': 0.2727723366470519, 'w_xgb': 0.29848577691329214, 'w_cat': 0.625137270816107}. Best is trial 2 with value: 0.17007482263214702.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000264 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:46:52,522] Trial 4 finished with value: 0.17469264651404948 and parameters: {'lgbm_n_estimators': 746, 'lgbm_learning_rate': 0.22632645786075026, 'lgbm_num_leaves': 45, 'xgb_n_estimators': 396, 'xgb_learning_rate': 0.2737323470896201, 'xgb_max_depth': 5, 'cat_iterations': 254, 'cat_learning_rate': 0.28242643113341726, 'cat_depth': 3, 'w_lgbm': 0.18193735323860283, 'w_xgb': 0.7011557480076596, 'w_cat': 0.159909242118344}. Best is trial 2 with value: 0.17007482263214702.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000273 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:46:54,302] Trial 5 finished with value: 0.17827004501898244 and parameters: {'lgbm_n_estimators': 630, 'lgbm_learning_rate': 0.22583744457374125, 'lgbm_num_leaves': 47, 'xgb_n_estimators': 793, 'xgb_learning_rate': 0.1750154619134249, 'xgb_max_depth': 4, 'cat_iterations': 769, 'cat_learning_rate': 0.13601814426265946, 'cat_depth': 4, 'w_lgbm': 0.44169685738974185, 'w_xgb': 0.45828218189227066, 'w_cat': 0.548097244765323}. Best is trial 2 with value: 0.17007482263214702.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000961 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:46:56,088] Trial 6 finished with value: 0.1877913207971895 and parameters: {'lgbm_n_estimators': 555, 'lgbm_learning_rate': 0.2565455015102255, 'lgbm_num_leaves': 40, 'xgb_n_estimators': 333, 'xgb_learning_rate': 0.1138161553904411, 'xgb_max_depth': 3, 'cat_iterations': 785, 'cat_learning_rate': 0.28433894740716553, 'cat_depth': 4, 'w_lgbm': 0.04593095645530787, 'w_xgb': 0.45104729947956546, 'w_cat': 0.8765245850889359}. Best is trial 2 with value: 0.17007482263214702.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000255 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:46:57,760] Trial 7 finished with value: 0.1866283071516878 and parameters: {'lgbm_n_estimators': 264, 'lgbm_learning_rate': 0.28695130786625694, 'lgbm_num_leaves': 81, 'xgb_n_estimators': 140, 'xgb_learning_rate': 0.22541267180658858, 'xgb_max_depth': 4, 'cat_iterations': 298, 'cat_learning_rate': 0.1037197585169834, 'cat_depth': 8, 'w_lgbm': 0.3986437058972523, 'w_xgb': 0.8789557179549233, 'w_cat': 0.5774462289389597}. Best is trial 2 with value: 0.17007482263214702.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000217 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:47:03,943] Trial 8 finished with value: 0.17220353121492105 and parameters: {'lgbm_n_estimators': 352, 'lgbm_learning_rate': 0.1923173117248525, 'lgbm_num_leaves': 22, 'xgb_n_estimators': 189, 'xgb_learning_rate': 0.20080205721286007, 'xgb_max_depth': 6, 'cat_iterations': 843, 'cat_learning_rate': 0.1358528077433117, 'cat_depth': 9, 'w_lgbm': 0.3172800360809186, 'w_xgb': 0.40623066554568144, 'w_cat': 0.06631898441655082}. Best is trial 2 with value: 0.17007482263214702.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000230 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:47:05,115] Trial 9 finished with value: 0.18128803352614786 and parameters: {'lgbm_n_estimators': 383, 'lgbm_learning_rate': 0.143777142893304, 'lgbm_num_leaves': 67, 'xgb_n_estimators': 249, 'xgb_learning_rate': 0.14756851717541516, 'xgb_max_depth': 3, 'cat_iterations': 290, 'cat_learning_rate': 0.26417332344064814, 'cat_depth': 7, 'w_lgbm': 0.6020547633126032, 'w_xgb': 0.6431322498091621, 'w_cat': 0.2598525773614452}. Best is trial 2 with value: 0.17007482263214702.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000225 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:47:08,248] Trial 10 finished with value: 0.17185032341511122 and parameters: {'lgbm_n_estimators': 878, 'lgbm_learning_rate': 0.11641366926489234, 'lgbm_num_leaves': 63, 'xgb_n_estimators': 532, 'xgb_learning_rate': 0.2647706277727012, 'xgb_max_depth': 9, 'cat_iterations': 985, 'cat_learning_rate': 0.02291851450968732, 'cat_depth': 6, 'w_lgbm': 0.9913727266162676, 'w_xgb': 0.23010890232686443, 'w_cat': 0.36126999994039277}. Best is trial 2 with value: 0.17007482263214702.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000310 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:47:11,238] Trial 11 finished with value: 0.17046466519216352 and parameters: {'lgbm_n_estimators': 995, 'lgbm_learning_rate': 0.1218721246541949, 'lgbm_num_leaves': 65, 'xgb_n_estimators': 986, 'xgb_learning_rate': 0.28925535985500495, 'xgb_max_depth': 9, 'cat_iterations': 998, 'cat_learning_rate': 0.029324106318252937, 'cat_depth': 6, 'w_lgbm': 0.9902534856964011, 'w_xgb': 0.21963825350504546, 'w_cat': 0.3414804671546149}. Best is trial 2 with value: 0.17007482263214702.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000512 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:47:14,451] Trial 12 finished with value: 0.16455886157216 and parameters: {'lgbm_n_estimators': 990, 'lgbm_learning_rate': 0.08101153181344997, 'lgbm_num_leaves': 71, 'xgb_n_estimators': 947, 'xgb_learning_rate': 0.2967946615845637, 'xgb_max_depth': 9, 'cat_iterations': 939, 'cat_learning_rate': 0.2163117192791635, 'cat_depth': 6, 'w_lgbm': 0.7467709081107023, 'w_xgb': 0.2079018614249648, 'w_cat': 0.3667300976389392}. Best is trial 12 with value: 0.16455886157216.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000246 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:47:16,171] Trial 13 finished with value: 0.17340885655342536 and parameters: {'lgbm_n_estimators': 468, 'lgbm_learning_rate': 0.07793983484387623, 'lgbm_num_leaves': 75, 'xgb_n_estimators': 982, 'xgb_learning_rate': 0.2472184849594894, 'xgb_max_depth': 8, 'cat_iterations': 541, 'cat_learning_rate': 0.21819766068599245, 'cat_depth': 5, 'w_lgbm': 0.7317146361865516, 'w_xgb': 0.6119540019936363, 'w_cat': 0.9922947954833796}. Best is trial 12 with value: 0.16455886157216.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000258 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:47:17,772] Trial 14 finished with value: 0.17728331244731144 and parameters: {'lgbm_n_estimators': 503, 'lgbm_learning_rate': 0.17119566011712412, 'lgbm_num_leaves': 100, 'xgb_n_estimators': 829, 'xgb_learning_rate': 0.29466365664784555, 'xgb_max_depth': 7, 'cat_iterations': 562, 'cat_learning_rate': 0.19909668965527055, 'cat_depth': 3, 'w_lgbm': 0.7901644082347918, 'w_xgb': 0.3285866322847649, 'w_cat': 0.7195862765590266}. Best is trial 12 with value: 0.16455886157216.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000572 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:47:20,853] Trial 15 finished with value: 0.17677018371918407 and parameters: {'lgbm_n_estimators': 109, 'lgbm_learning_rate': 0.07757107901800188, 'lgbm_num_leaves': 53, 'xgb_n_estimators': 570, 'xgb_learning_rate': 0.14566030517435766, 'xgb_max_depth': 10, 'cat_iterations': 857, 'cat_learning_rate': 0.2012853274610769, 'cat_depth': 7, 'w_lgbm': 0.6607275511999611, 'w_xgb': 0.11814636721916583, 'w_cat': 0.4504323069849169}. Best is trial 12 with value: 0.16455886157216.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000244 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:47:23,116] Trial 16 finished with value: 0.16634161028545574 and parameters: {'lgbm_n_estimators': 879, 'lgbm_learning_rate': 0.073585931132794, 'lgbm_num_leaves': 55, 'xgb_n_estimators': 866, 'xgb_learning_rate': 0.1844722584977338, 'xgb_max_depth': 7, 'cat_iterations': 606, 'cat_learning_rate': 0.2448629769329473, 'cat_depth': 5, 'w_lgbm': 0.8747845927756022, 'w_xgb': 0.5340379750015346, 'w_cat': 0.44275141214170544}. Best is trial 12 with value: 0.16455886157216.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000219 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:47:25,573] Trial 17 finished with value: 0.16629750488727946 and parameters: {'lgbm_n_estimators': 998, 'lgbm_learning_rate': 0.07076492601020695, 'lgbm_num_leaves': 74, 'xgb_n_estimators': 884, 'xgb_learning_rate': 0.11748435051462405, 'xgb_max_depth': 8, 'cat_iterations': 438, 'cat_learning_rate': 0.24501512402414596, 'cat_depth': 5, 'w_lgbm': 0.8634515572358589, 'w_xgb': 0.8601115300047747, 'w_cat': 0.43937451202428995}. Best is trial 12 with value: 0.16455886157216.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000440 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:47:28,388] Trial 18 finished with value: 0.16468134826123354 and parameters: {'lgbm_n_estimators': 985, 'lgbm_learning_rate': 0.05370837153722118, 'lgbm_num_leaves': 74, 'xgb_n_estimators': 902, 'xgb_learning_rate': 0.11544167653075849, 'xgb_max_depth': 10, 'cat_iterations': 427, 'cat_learning_rate': 0.17653896946100422, 'cat_depth': 6, 'w_lgbm': 0.6009518505320565, 'w_xgb': 0.9958109315869169, 'w_cat': 0.1975944144976754}. Best is trial 12 with value: 0.16455886157216.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000278 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:47:32,767] Trial 19 finished with value: 0.18078256965241776 and parameters: {'lgbm_n_estimators': 828, 'lgbm_learning_rate': 0.021654164560903563, 'lgbm_num_leaves': 92, 'xgb_n_estimators': 594, 'xgb_learning_rate': 0.010195226979013539, 'xgb_max_depth': 10, 'cat_iterations': 410, 'cat_learning_rate': 0.16410207521322484, 'cat_depth': 8, 'w_lgbm': 0.5752331603988661, 'w_xgb': 0.9511030236883864, 'w_cat': 0.011687602809781539}. Best is trial 12 with value: 0.16455886157216.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000237 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:47:35,675] Trial 20 finished with value: 0.1672558440351445 and parameters: {'lgbm_n_estimators': 929, 'lgbm_learning_rate': 0.10505977126141772, 'lgbm_num_leaves': 72, 'xgb_n_estimators': 917, 'xgb_learning_rate': 0.0743519224809362, 'xgb_max_depth': 9, 'cat_iterations': 390, 'cat_learning_rate': 0.17440395829288405, 'cat_depth': 6, 'w_lgbm': 0.6158823888096812, 'w_xgb': 0.7916453157401165, 'w_cat': 0.18122254692487377}. Best is trial 12 with value: 0.16455886157216.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000318 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:47:38,723] Trial 21 finished with value: 0.16390035125311558 and parameters: {'lgbm_n_estimators': 974, 'lgbm_learning_rate': 0.04542139961778373, 'lgbm_num_leaves': 77, 'xgb_n_estimators': 908, 'xgb_learning_rate': 0.11250153791314059, 'xgb_max_depth': 8, 'cat_iterations': 456, 'cat_learning_rate': 0.24007233103204212, 'cat_depth': 7, 'w_lgbm': 0.7310911370169595, 'w_xgb': 0.9353396891727362, 'w_cat': 0.3262138624168582}. Best is trial 21 with value: 0.16390035125311558.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000354 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:47:41,904] Trial 22 finished with value: 0.1622533372016204 and parameters: {'lgbm_n_estimators': 830, 'lgbm_learning_rate': 0.047721032984230895, 'lgbm_num_leaves': 83, 'xgb_n_estimators': 781, 'xgb_learning_rate': 0.11166305217624475, 'xgb_max_depth': 10, 'cat_iterations': 485, 'cat_learning_rate': 0.23909658716900345, 'cat_depth': 7, 'w_lgbm': 0.7149454210147839, 'w_xgb': 0.9566407325221729, 'w_cat': 0.26210432885559576}. Best is trial 22 with value: 0.1622533372016204.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000187 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:47:45,990] Trial 23 finished with value: 0.1626875181178253 and parameters: {'lgbm_n_estimators': 801, 'lgbm_learning_rate': 0.04203952051068819, 'lgbm_num_leaves': 85, 'xgb_n_estimators': 778, 'xgb_learning_rate': 0.08954713542564355, 'xgb_max_depth': 8, 'cat_iterations': 501, 'cat_learning_rate': 0.2376703063940832, 'cat_depth': 8, 'w_lgbm': 0.7047659567914125, 'w_xgb': 0.7660004828563546, 'w_cat': 0.2935037952766044}. Best is trial 22 with value: 0.1622533372016204.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002158 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:47:51,479] Trial 24 finished with value: 0.17047386790726848 and parameters: {'lgbm_n_estimators': 813, 'lgbm_learning_rate': 0.011038818255643956, 'lgbm_num_leaves': 83, 'xgb_n_estimators': 748, 'xgb_learning_rate': 0.0759584640180512, 'xgb_max_depth': 8, 'cat_iterations': 506, 'cat_learning_rate': 0.24436667346720425, 'cat_depth': 8, 'w_lgbm': 0.7032156791390822, 'w_xgb': 0.7592571694962329, 'w_cat': 0.2711833212130427}. Best is trial 22 with value: 0.1622533372016204.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000763 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:47:55,451] Trial 25 finished with value: 0.1661638901456216 and parameters: {'lgbm_n_estimators': 634, 'lgbm_learning_rate': 0.03832179240075617, 'lgbm_num_leaves': 97, 'xgb_n_estimators': 656, 'xgb_learning_rate': 0.09580855499150226, 'xgb_max_depth': 8, 'cat_iterations': 639, 'cat_learning_rate': 0.2939901972327205, 'cat_depth': 7, 'w_lgbm': 0.5073294534059551, 'w_xgb': 0.9046647639000522, 'w_cat': 0.2892490830873253}. Best is trial 22 with value: 0.1622533372016204.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000169 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:48:00,725] Trial 26 finished with value: 0.17081280447230265 and parameters: {'lgbm_n_estimators': 784, 'lgbm_learning_rate': 0.05621703177475443, 'lgbm_num_leaves': 89, 'xgb_n_estimators': 482, 'xgb_learning_rate': 0.04376030182884183, 'xgb_max_depth': 6, 'cat_iterations': 496, 'cat_learning_rate': 0.26311052881185026, 'cat_depth': 9, 'w_lgbm': 0.8005394736339235, 'w_xgb': 0.8072180471892451, 'w_cat': 0.10983217705786597}. Best is trial 22 with value: 0.1622533372016204.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000266 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:48:04,624] Trial 27 finished with value: 0.16697386194447483 and parameters: {'lgbm_n_estimators': 891, 'lgbm_learning_rate': 0.10136021005276631, 'lgbm_num_leaves': 80, 'xgb_n_estimators': 796, 'xgb_learning_rate': 0.1624860334803061, 'xgb_max_depth': 9, 'cat_iterations': 354, 'cat_learning_rate': 0.23084367809010473, 'cat_depth': 8, 'w_lgbm': 0.6943301469335693, 'w_xgb': 0.994481253129995, 'w_cat': 0.2384032240960441}. Best is trial 22 with value: 0.1622533372016204.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000302 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:48:09,030] Trial 28 finished with value: 0.16349801094288596 and parameters: {'lgbm_n_estimators': 690, 'lgbm_learning_rate': 0.1425490022245976, 'lgbm_num_leaves': 87, 'xgb_n_estimators': 620, 'xgb_learning_rate': 0.13196736380634858, 'xgb_max_depth': 10, 'cat_iterations': 145, 'cat_learning_rate': 0.18825535928540424, 'cat_depth': 10, 'w_lgbm': 0.8765275108144573, 'w_xgb': 0.7182558203988967, 'w_cat': 0.32359873442061365}. Best is trial 22 with value: 0.1622533372016204.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000259 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:48:13,785] Trial 29 finished with value: 0.1647139933119262 and parameters: {'lgbm_n_estimators': 696, 'lgbm_learning_rate': 0.14358144121794603, 'lgbm_num_leaves': 95, 'xgb_n_estimators': 651, 'xgb_learning_rate': 0.13487148720461228, 'xgb_max_depth': 10, 'cat_iterations': 159, 'cat_learning_rate': 0.047856715234099165, 'cat_depth': 10, 'w_lgbm': 0.9041677743986815, 'w_xgb': 0.717092818982852, 'w_cat': 0.11293325296342482}. Best is trial 22 with value: 0.1622533372016204.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000337 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:48:19,820] Trial 30 finished with value: 0.16197866990920873 and parameters: {'lgbm_n_estimators': 599, 'lgbm_learning_rate': 0.1317051712612848, 'lgbm_num_leaves': 33, 'xgb_n_estimators': 625, 'xgb_learning_rate': 0.09336346269541453, 'xgb_max_depth': 10, 'cat_iterations': 223, 'cat_learning_rate': 0.18699961460014708, 'cat_depth': 10, 'w_lgbm': 0.9114665684434446, 'w_xgb': 0.5623311033977562, 'w_cat': 0.49163534416712273}. Best is trial 30 with value: 0.16197866990920873.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000133 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:48:23,944] Trial 31 finished with value: 0.16454954624634088 and parameters: {'lgbm_n_estimators': 596, 'lgbm_learning_rate': 0.13234056436090916, 'lgbm_num_leaves': 30, 'xgb_n_estimators': 644, 'xgb_learning_rate': 0.09518682600476674, 'xgb_max_depth': 10, 'cat_iterations': 121, 'cat_learning_rate': 0.19029094632533727, 'cat_depth': 10, 'w_lgbm': 0.914817587132867, 'w_xgb': 0.5643038678050779, 'w_cat': 0.41598807631005613}. Best is trial 30 with value: 0.16197866990920873.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000324 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:48:28,462] Trial 32 finished with value: 0.16295294391324486 and parameters: {'lgbm_n_estimators': 697, 'lgbm_learning_rate': 0.16844761973959033, 'lgbm_num_leaves': 31, 'xgb_n_estimators': 483, 'xgb_learning_rate': 0.05543104184303631, 'xgb_max_depth': 10, 'cat_iterations': 208, 'cat_learning_rate': 0.18384338813133194, 'cat_depth': 9, 'w_lgbm': 0.8169865823487958, 'w_xgb': 0.6231571496544558, 'w_cat': 0.4969648708351585}. Best is trial 30 with value: 0.16197866990920873.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000199 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:48:31,636] Trial 33 finished with value: 0.16451278637191583 and parameters: {'lgbm_n_estimators': 733, 'lgbm_learning_rate': 0.16809952823751612, 'lgbm_num_leaves': 31, 'xgb_n_estimators': 477, 'xgb_learning_rate': 0.05352277457628282, 'xgb_max_depth': 9, 'cat_iterations': 217, 'cat_learning_rate': 0.15866142934709834, 'cat_depth': 9, 'w_lgbm': 0.8155124095462206, 'w_xgb': 0.627696076631628, 'w_cat': 0.6869656747707782}. Best is trial 30 with value: 0.16197866990920873.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000206 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:48:37,203] Trial 34 finished with value: 0.16369919815082148 and parameters: {'lgbm_n_estimators': 645, 'lgbm_learning_rate': 0.195311807982634, 'lgbm_num_leaves': 30, 'xgb_n_estimators': 728, 'xgb_learning_rate': 0.013413857978328877, 'xgb_max_depth': 10, 'cat_iterations': 347, 'cat_learning_rate': 0.22351750631441356, 'cat_depth': 9, 'w_lgbm': 0.784947892003374, 'w_xgb': 0.8428265830970278, 'w_cat': 0.5421245783802509}. Best is trial 30 with value: 0.16197866990920873.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000194 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:48:41,874] Trial 35 finished with value: 0.1651996344348827 and parameters: {'lgbm_n_estimators': 806, 'lgbm_learning_rate': 0.09882355979894106, 'lgbm_num_leaves': 37, 'xgb_n_estimators': 525, 'xgb_learning_rate': 0.06385655739315985, 'xgb_max_depth': 9, 'cat_iterations': 213, 'cat_learning_rate': 0.2627194143495193, 'cat_depth': 8, 'w_lgbm': 0.6467281267057086, 'w_xgb': 0.5758380252109766, 'w_cat': 0.4969257874500862}. Best is trial 30 with value: 0.16197866990920873.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001070 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:48:49,216] Trial 36 finished with value: 0.16142461139637018 and parameters: {'lgbm_n_estimators': 550, 'lgbm_learning_rate': 0.2098244892959718, 'lgbm_num_leaves': 27, 'xgb_n_estimators': 380, 'xgb_learning_rate': 0.08973419198016061, 'xgb_max_depth': 10, 'cat_iterations': 668, 'cat_learning_rate': 0.14166557548608544, 'cat_depth': 9, 'w_lgbm': 0.9429745180492547, 'w_xgb': 0.6797375992592232, 'w_cat': 0.6355667238423112}. Best is trial 36 with value: 0.16142461139637018.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000232 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:49:03,054] Trial 37 finished with value: 0.16225911708715032 and parameters: {'lgbm_n_estimators': 539, 'lgbm_learning_rate': 0.2413284703341615, 'lgbm_num_leaves': 24, 'xgb_n_estimators': 388, 'xgb_learning_rate': 0.09064201589075786, 'xgb_max_depth': 7, 'cat_iterations': 681, 'cat_learning_rate': 0.12036165687167291, 'cat_depth': 10, 'w_lgbm': 0.9454693206935345, 'w_xgb': 0.673918337132728, 'w_cat': 0.6656514255304948}. Best is trial 36 with value: 0.16142461139637018.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000314 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:49:19,017] Trial 38 finished with value: 0.168836090726832 and parameters: {'lgbm_n_estimators': 439, 'lgbm_learning_rate': 0.25152484990845564, 'lgbm_num_leaves': 20, 'xgb_n_estimators': 369, 'xgb_learning_rate': 0.03190996182346083, 'xgb_max_depth': 6, 'cat_iterations': 701, 'cat_learning_rate': 0.11294807248621987, 'cat_depth': 10, 'w_lgbm': 0.9335700977298272, 'w_xgb': 0.4945912621331685, 'w_cat': 0.6527947813919575}. Best is trial 36 with value: 0.16142461139637018.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000419 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:49:34,926] Trial 39 finished with value: 0.169126494017005 and parameters: {'lgbm_n_estimators': 542, 'lgbm_learning_rate': 0.21261199845876855, 'lgbm_num_leaves': 26, 'xgb_n_estimators': 290, 'xgb_learning_rate': 0.08335327709303678, 'xgb_max_depth': 5, 'cat_iterations': 681, 'cat_learning_rate': 0.14451811119532396, 'cat_depth': 10, 'w_lgbm': 0.9543829430446502, 'w_xgb': 0.6721442106484711, 'w_cat': 0.7563170720028115}. Best is trial 36 with value: 0.16142461139637018.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000390 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:49:50,943] Trial 40 finished with value: 0.16252434218490142 and parameters: {'lgbm_n_estimators': 404, 'lgbm_learning_rate': 0.286075315822977, 'lgbm_num_leaves': 39, 'xgb_n_estimators': 424, 'xgb_learning_rate': 0.09727043851632813, 'xgb_max_depth': 7, 'cat_iterations': 743, 'cat_learning_rate': 0.11711095599520685, 'cat_depth': 10, 'w_lgbm': 0.9459965714958548, 'w_xgb': 0.4910058388710517, 'w_cat': 0.6164413436700849}. Best is trial 36 with value: 0.16142461139637018.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000320 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:50:05,922] Trial 41 finished with value: 0.16324141092053826 and parameters: {'lgbm_n_estimators': 391, 'lgbm_learning_rate': 0.2922366256957887, 'lgbm_num_leaves': 38, 'xgb_n_estimators': 412, 'xgb_learning_rate': 0.10314022316077875, 'xgb_max_depth': 7, 'cat_iterations': 753, 'cat_learning_rate': 0.07526736570860237, 'cat_depth': 10, 'w_lgbm': 0.9429660691828979, 'w_xgb': 0.4051361691364951, 'w_cat': 0.6093118042199362}. Best is trial 36 with value: 0.16142461139637018.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001268 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:50:11,320] Trial 42 finished with value: 0.16133743436598275 and parameters: {'lgbm_n_estimators': 321, 'lgbm_learning_rate': 0.27744952300667025, 'lgbm_num_leaves': 44, 'xgb_n_estimators': 432, 'xgb_learning_rate': 0.12823844520343505, 'xgb_max_depth': 7, 'cat_iterations': 637, 'cat_learning_rate': 0.11787603095258922, 'cat_depth': 9, 'w_lgbm': 0.8478228294981671, 'w_xgb': 0.49578678608239557, 'w_cat': 0.7792915519267711}. Best is trial 42 with value: 0.16133743436598275.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000236 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:50:16,239] Trial 43 finished with value: 0.16432426182054197 and parameters: {'lgbm_n_estimators': 239, 'lgbm_learning_rate': 0.27023808232735425, 'lgbm_num_leaves': 44, 'xgb_n_estimators': 344, 'xgb_learning_rate': 0.13030009786492971, 'xgb_max_depth': 6, 'cat_iterations': 612, 'cat_learning_rate': 0.08178712044599065, 'cat_depth': 9, 'w_lgbm': 0.8383104284566556, 'w_xgb': 0.3336183653925012, 'w_cat': 0.7703313179799323}. Best is trial 42 with value: 0.16133743436598275.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000271 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:50:22,085] Trial 44 finished with value: 0.16766997129949412 and parameters: {'lgbm_n_estimators': 315, 'lgbm_learning_rate': 0.24221397457157195, 'lgbm_num_leaves': 25, 'xgb_n_estimators': 247, 'xgb_learning_rate': 0.16040972004890758, 'xgb_max_depth': 4, 'cat_iterations': 819, 'cat_learning_rate': 0.1257005457531966, 'cat_depth': 9, 'w_lgbm': 0.9961011834319269, 'w_xgb': 0.4395813507834125, 'w_cat': 0.8557532329332042}. Best is trial 42 with value: 0.16133743436598275.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000276 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:50:27,115] Trial 45 finished with value: 0.1723779561074715 and parameters: {'lgbm_n_estimators': 564, 'lgbm_learning_rate': 0.2218400458804752, 'lgbm_num_leaves': 34, 'xgb_n_estimators': 445, 'xgb_learning_rate': 0.06883831609871983, 'xgb_max_depth': 5, 'cat_iterations': 656, 'cat_learning_rate': 0.09698122894780598, 'cat_depth': 9, 'w_lgbm': 0.08648792423374829, 'w_xgb': 0.5554476319636843, 'w_cat': 0.8170811018896383}. Best is trial 42 with value: 0.16133743436598275.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000295 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:50:38,308] Trial 46 finished with value: 0.16365202307079532 and parameters: {'lgbm_n_estimators': 318, 'lgbm_learning_rate': 0.26600591839581095, 'lgbm_num_leaves': 49, 'xgb_n_estimators': 298, 'xgb_learning_rate': 0.12378283152655445, 'xgb_max_depth': 9, 'cat_iterations': 574, 'cat_learning_rate': 0.143892553570986, 'cat_depth': 10, 'w_lgbm': 0.8795454668431354, 'w_xgb': 0.6599104176484427, 'w_cat': 0.9837825908382863}. Best is trial 42 with value: 0.16133743436598275.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000393 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:50:43,961] Trial 47 finished with value: 0.16166265339822497 and parameters: {'lgbm_n_estimators': 526, 'lgbm_learning_rate': 0.20443172528461218, 'lgbm_num_leaves': 26, 'xgb_n_estimators': 390, 'xgb_learning_rate': 0.10669879409449837, 'xgb_max_depth': 7, 'cat_iterations': 719, 'cat_learning_rate': 0.15048978961320378, 'cat_depth': 9, 'w_lgbm': 0.7578672603177449, 'w_xgb': 0.5880391855903121, 'w_cat': 0.9124905675860056}. Best is trial 42 with value: 0.16133743436598275.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000229 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:50:48,083] Trial 48 finished with value: 0.16130273001505413 and parameters: {'lgbm_n_estimators': 164, 'lgbm_learning_rate': 0.19895196162082854, 'lgbm_num_leaves': 45, 'xgb_n_estimators': 696, 'xgb_learning_rate': 0.14333552365175503, 'xgb_max_depth': 10, 'cat_iterations': 884, 'cat_learning_rate': 0.14468764366716041, 'cat_depth': 8, 'w_lgbm': 0.7505228671773148, 'w_xgb': 0.5192950338646558, 'w_cat': 0.8627459136063318}. Best is trial 48 with value: 0.16130273001505413.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000170 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1834
[LightGBM] [Info] Number of data points in the train set: 2400, number of used features: 31
[LightGBM] [Info] Start training from score 0.481912


[I 2025-07-23 22:50:52,426] Trial 49 finished with value: 0.16289459782513732 and parameters: {'lgbm_n_estimators': 179, 'lgbm_learning_rate': 0.2035460051951775, 'lgbm_num_leaves': 44, 'xgb_n_estimators': 559, 'xgb_learning_rate': 0.14548592763763513, 'xgb_max_depth': 9, 'cat_iterations': 909, 'cat_learning_rate': 0.15311852438967155, 'cat_depth': 8, 'w_lgbm': 0.44693216406077163, 'w_xgb': 0.5263054258640745, 'w_cat': 0.9159436171739392}. Best is trial 48 with value: 0.16130273001505413.


Best trial:
FrozenTrial(number=48, state=1, values=[0.16130273001505413], datetime_start=datetime.datetime(2025, 7, 23, 22, 50, 43, 963265), datetime_complete=datetime.datetime(2025, 7, 23, 22, 50, 48, 83603), params={'lgbm_n_estimators': 164, 'lgbm_learning_rate': 0.19895196162082854, 'lgbm_num_leaves': 45, 'xgb_n_estimators': 696, 'xgb_learning_rate': 0.14333552365175503, 'xgb_max_depth': 10, 'cat_iterations': 884, 'cat_learning_rate': 0.14468764366716041, 'cat_depth': 8, 'w_lgbm': 0.7505228671773148, 'w_xgb': 0.5192950338646558, 'w_cat': 0.8627459136063318}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'lgbm_n_estimators': IntDistribution(high=1000, log=False, low=100, step=1), 'lgbm_learning_rate': FloatDistribution(high=0.3, log=False, low=0.01, step=None), 'lgbm_num_leaves': IntDistribution(high=100, log=False, low=20, step=1), 'xgb_n_estimators': IntDistribution(high=1000, log=False, low=100, step=1), 'xgb_learning_rate': FloatDistribution(high=0.3, log

In [None]:
# 1. 최적 파라미터 추출
best_params = study.best_trial.params

# 2. 모델 생성 (최적 파라미터 적용)
lgbm = LGBMRegressor(
    n_estimators=best_params['lgbm_n_estimators'],
    learning_rate=best_params['lgbm_learning_rate'],
    num_leaves=best_params['lgbm_num_leaves'],
    random_state=42
)
xgb = XGBRegressor(
    n_estimators=best_params['xgb_n_estimators'],
    learning_rate=best_params['xgb_learning_rate'],
    max_depth=best_params['xgb_max_depth'],
    random_state=42
)
cat = CatBoostRegressor(
    iterations=best_params['cat_iterations'],
    learning_rate=best_params['cat_learning_rate'],
    depth=best_params['cat_depth'],
    random_seed=42,
    verbose=0
)

# 3. 전체 train 데이터로 재학습
X_full = train.drop(['ID', 'stress_score'], axis=1, errors='ignore')
y_full = train['stress_score']

lgbm.fit(X_full, y_full)
xgb.fit(X_full, y_full)
cat.fit(X_full, y_full)

# 4. test 데이터 예측
pred_lgbm = lgbm.predict(test)
pred_xgb = xgb.predict(test)
pred_cat = cat.predict(test)


In [None]:
# 5. 앙상블 가중치 정규화
w_lgbm = best_params['w_lgbm']
w_xgb = best_params['w_xgb']
w_cat = best_params['w_cat']
total = w_lgbm + w_xgb + w_cat
w_lgbm /= total
w_xgb /= total
w_cat /= total

# 6. 앙상블 예측
pred_ensemble = w_lgbm * pred_lgbm + w_xgb * pred_xgb + w_cat * pred_cat

In [None]:
# 7. 제출 파일 생성
submission = pd.read_csv('./sample_submission.csv')
submission['stress_score'] = pred_ensemble
submission.to_csv('submit.csv', index=False)

In [None]:
# importances = lgbm.feature_importances_
# feature_names = x_train.columns

# importance_df = pd.DataFrame({
#     'Feature': feature_names,
#     'Importance': importances
# }).sort_values(by='Importance', ascending=False)

# plt.figure(figsize=(10, 6))
# sns.barplot(data=importance_df.head(20), x='Importance', y='Feature')
# plt.title('Top 20 Feature Importances (LGBM)')
# plt.show()

In [None]:
submission = pd.read_csv('./sample_submission.csv')

In [None]:
submission['stress_score'] = pred
submission.head()

In [None]:
submission.to_csv('submit.csv', index=False)

In [None]:
# train에서 나눠서 데이콘에서 측정하는 MAE와 다를수있음
# 성능이 올라갔는지 그대로인지 내려갔는지 경향만 파악할 수 있음
# 확인용으로만 쓸것

import datetime

# MAE 로그파일 세팅
def loging(MAE):
    with open('./mae_log', mode='a', encoding='utf-8') as f:
        current_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        f.write(f"시간: {current_time} / MAE: {MAE}\n")

# MAE 체크용
train_set, val_set = train_test_split(train, test_size=0.2, random_state=42)

X_train = train_set.drop(['ID', 'stress_score'], axis=1)
y_train = train_set['stress_score']

X_val = val_set.drop(['ID', 'stress_score'], axis=1) 
y_val = val_set['stress_score']

lgbm = LGBMRegressor(random_state=42)
lgbm.fit(X_train, y_train)

val_pred = lgbm.predict(X_val)

MAE = mean_absolute_error(y_val, val_pred)

print(f"모델의 검증 MAE 점수: {MAE}")
loging(MAE)