In [10]:
# 필요한 라이브러리 로드
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import lightgbm as lgb
from sklearn.preprocessing import LabelEncoder, StandardScaler
import optuna
from sklearn.model_selection import KFold

# 데이터 로드
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')
submission = pd.read_csv('data/sample_submission.csv')

# 배터리용량 결측치 대체 함수 정의
def fill_battery_capacity(df):
    # 결측치 행만 처리
    missing_indices = df[df['배터리용량'].isnull()].index

    # 차량상태가 Brand New인 경우
    brand_new_avg = df[df['차량상태'] == 'Brand New']['배터리용량'].mean()

    for idx in missing_indices:
        row = df.loc[idx]

        if row['차량상태'] == 'Brand New':
            # 1. Brand New: 차량 상태 평균으로 대체
            df.loc[idx, '배터리용량'] = brand_new_avg
        elif row['모델'] == 'IONIQ':
            # 3. 모델이 IONIQ: 같은 차량 상태 + 보증기간 평균으로 대체
            same_condition_warranty = df[(df['차량상태'] == row['차량상태']) & 
                                         (df['보증기간(년)'] == row['보증기간(년)'])]['배터리용량']
            if not same_condition_warranty.empty:
                df.loc[idx, '배터리용량'] = same_condition_warranty.mean()
        else:
            # 2. 그 외: 같은 차량 상태 + 모델 평균으로 대체
            same_condition_model = df[(df['차량상태'] == row['차량상태']) & 
                                       (df['모델'] == row['모델'])]['배터리용량']
            if not same_condition_model.empty:
                df.loc[idx, '배터리용량'] = same_condition_model.mean()

    return df

# 결측치 처리
train = fill_battery_capacity(train)
test = fill_battery_capacity(test)

# 결측치가 모두 처리되었는지 확인
print(f"Train 결측치: {train['배터리용량'].isnull().sum()}")
print(f"Test 결측치: {test['배터리용량'].isnull().sum()}")


# 사고이력과 차량상태를 더미 변수로 변환
train = pd.get_dummies(train, columns=['제조사', '모델', '구동방식'], drop_first=True)
test = pd.get_dummies(test, columns=['제조사', '모델', '구동방식'], drop_first=True)

# 차량상태 값을 순서를 고려해 직접 매핑
condition_mapping = {
    'Pre-Owned': 0,
    'Nearly New': 1,
    'Brand New': 2
}

# 차량상태 컬럼에 매핑 적용
train['차량상태'] = train['차량상태'].map(condition_mapping)
test['차량상태'] = test['차량상태'].map(condition_mapping)

# Label Encoding for '차량상태'
label_encoder = LabelEncoder()
train['사고이력'] = label_encoder.fit_transform(train['사고이력'])
test['사고이력'] = label_encoder.transform(test['사고이력'])

# 파생변수 생성
train['배터리효율'] = train['배터리용량'] / (train['주행거리(km)'] + 1)  # 주행거리가 0일 경우를 방지
test['배터리효율'] = test['배터리용량'] / (test['주행거리(km)'] + 1)

train['배터리용량_50미만'] = (train['배터리용량'] < 50)
test['배터리용량_50미만'] = (test['배터리용량'] >= 50)

train['연간주행거리'] = train['주행거리(km)'] / (train['연식(년)'] + 1)  # 연식이 0일 경우 방지
test['연간주행거리'] = test['주행거리(km)'] / (test['연식(년)'] + 1)

# # 수치형 변수
# continuous_vars = ['보증기간(년)', '연식(년)', '주행거리(km)', '배터리용량', '배터리효율']
# 
# 
# # Initialize the scaler
# scaler = StandardScaler()
# 
# # Fit and transform the scaler on train data, and transform the test data
# train[continuous_vars] = scaler.fit_transform(train[continuous_vars])
# test[continuous_vars] = scaler.transform(test[continuous_vars])

# 타깃 변수와 특성 분리
X = train.drop(columns=['ID', '가격(백만원)'])
y = train['가격(백만원)']
test_data = test.drop(columns=['ID'])

print("특성 개수", len(X.columns))
print("특성", X.info())

from statsmodels.stats.outliers_influence import variance_inflation_factor

def calculate_vif(df):
    """
    Calculate Variance Inflation Factor (VIF) for a DataFrame.

    Parameters:
        df (DataFrame): DataFrame containing numerical features for VIF calculation.

    Returns:
        DataFrame: VIF values for each feature.
    """
    # Ensure only numeric columns are used
    numeric_df = df.select_dtypes(include=['number'])

    # Drop rows with missing values to avoid errors
    numeric_df = numeric_df.dropna()

    vif_data = pd.DataFrame()
    vif_data["Feature"] = numeric_df.columns
    vif_data["VIF"] = [
        variance_inflation_factor(numeric_df.values, i) for i in range(numeric_df.shape[1])
    ]
    return vif_data

# Example usage:
# Assuming 'X' is your DataFrame with both numeric and non-numeric columns
vif_results = calculate_vif(X)
print(vif_results)

# X = X.drop(columns=['배터리용량'])
# test_data = test_data.drop(columns=['배터리용량'])
# 
# vif_results = calculate_vif(X)
# print(vif_results)
# 
# X = X.drop(columns=['보증기간(년)'])
# test_data = test_data.drop(columns=['보증기간(년)'])
# 
# vif_results = calculate_vif(X)
# print(vif_results)

# from sklearn.ensemble import RandomForestRegressor
# from sklearn.linear_model import ElasticNet
# from sklearn.model_selection import cross_val_score
# from xgboost import XGBRegressor
# from catboost import CatBoostRegressor
# 
# # 모델 리스트
# models = {
#     'Random Forest': RandomForestRegressor(random_state=42),
#     'ElasticNet': ElasticNet(random_state=42),
#     'XGBoost': XGBRegressor(random_state=42),
#     'CatBoost': CatBoostRegressor(verbose=0, random_state=42),
#     'LightGBM': lgb.LGBMRegressor(random_state=42)
# }
# 
# # 결과 저장
# cv_results = {}
# 
# # 5-Fold Cross-Validation 수행
# for model_name, model in models.items():
#     print(f"Training {model_name}...")
#     scores = cross_val_score(model, X, y, cv=5, scoring='neg_root_mean_squared_error')  # RMSE로 평가
#     cv_results[model_name] = -scores.mean()  # 평균 RMSE 저장
# 
# # 결과 출력
# for model_name, rmse in cv_results.items():
#     print(f"{model_name}: Mean RMSE = {rmse:.4f}")



# LightGBM, XGBoost, CatBoost Optuna 최적화
def objective(trial, model_type):
    if model_type == 'lightgbm':
        params = {
            'objective': 'regression',
            'metric': 'rmse',
            'learning_rate': trial.suggest_float('learning_rate', 0.0005, 0.1, log=True),
            'subsample': trial.suggest_float('subsample', 0.6, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
            'max_depth': trial.suggest_int('max_depth', 3, 20),
            'random_state': 42
        }
        lgb_dataset = lgb.Dataset(X, y)
        cv_results = lgb.cv(
            params,
            lgb_dataset,
            num_boost_round=500,
            nfold=5,
            metrics='rmse',
            stratified=False,
            seed=42        )
        return cv_results['valid rmse-mean'][-1]


# 모델별 Optuna 최적화
lgb_study = optuna.create_study(direction='minimize')
lgb_study.optimize(lambda trial: objective(trial, 'lightgbm'), n_trials=30)
lgb_best_params = lgb_study.best_params
lgb_best_rmse = lgb_study.best_value


# 최적 모델 선택
best_model_type = min(
    [('lightgbm', lgb_best_rmse, lgb_best_params)],
    key=lambda x: x[1]
)

print(f"Best Model: {best_model_type[0]} with RMSE: {best_model_type[1]}")

# 최적 모델 학습 및 예측
if best_model_type[0] == 'lightgbm':
    final_model = lgb.train(
        {**best_model_type[2], 'objective': 'regression', 'metric': 'rmse'},
        lgb.Dataset(X, y),
        num_boost_round=500
    )
    final_pred = final_model.predict(test_data)


# 제출 파일 생성
submission['가격(백만원)'] = final_pred
submission.to_csv('submission.csv', index=False)
print("제출 파일이 'submission.csv'로 저장되었습니다.")

[I 2025-01-14 00:05:31,725] A new study created in memory with name: no-name-92b664bb-9166-42f4-bbef-2e9a6ddf1783


Train 결측치: 0
Test 결측치: 0
특성 개수 37
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7497 entries, 0 to 7496
Data columns (total 37 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   차량상태        7497 non-null   int64  
 1   배터리용량       7497 non-null   float64
 2   주행거리(km)    7497 non-null   int64  
 3   보증기간(년)     7497 non-null   int64  
 4   사고이력        7497 non-null   int32  
 5   연식(년)       7497 non-null   int64  
 6   제조사_B사      7497 non-null   bool   
 7   제조사_H사      7497 non-null   bool   
 8   제조사_K사      7497 non-null   bool   
 9   제조사_P사      7497 non-null   bool   
 10  제조사_T사      7497 non-null   bool   
 11  제조사_V사      7497 non-null   bool   
 12  모델_ID4      7497 non-null   bool   
 13  모델_ION5     7497 non-null   bool   
 14  모델_ION6     7497 non-null   bool   
 15  모델_IONIQ    7497 non-null   bool   
 16  모델_KNE      7497 non-null   bool   
 17  모델_M3       7497 non-null   bool   
 18  모델_MS       7497 non-null   bool  

[I 2025-01-14 00:05:37,941] Trial 0 finished with value: 1.370981872379497 and parameters: {'learning_rate': 0.014378034412456681, 'subsample': 0.6083351624278156, 'colsample_bytree': 0.7865602850060798, 'max_depth': 9}. Best is trial 0 with value: 1.370981872379497.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000864 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000772 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000690 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000847 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:05:44,301] Trial 1 finished with value: 1.4469945044164054 and parameters: {'learning_rate': 0.0999197440665143, 'subsample': 0.8690182940747624, 'colsample_bytree': 0.6930786383816108, 'max_depth': 19}. Best is trial 0 with value: 1.370981872379497.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000941 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000996 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000890 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000931 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:05:49,435] Trial 2 finished with value: 1.5667042039893526 and parameters: {'learning_rate': 0.009498902442369882, 'subsample': 0.6449133382783637, 'colsample_bytree': 0.6131708181809599, 'max_depth': 14}. Best is trial 0 with value: 1.370981872379497.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000873 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000982 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000763 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000902 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:05:53,438] Trial 3 finished with value: 1.3807588129766626 and parameters: {'learning_rate': 0.04111723778931302, 'subsample': 0.9026275300475112, 'colsample_bytree': 0.7134695687781419, 'max_depth': 5}. Best is trial 0 with value: 1.370981872379497.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000917 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000864 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001027 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000888 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:05:58,049] Trial 4 finished with value: 18.726986472982155 and parameters: {'learning_rate': 0.0014053281355578507, 'subsample': 0.6768130660759367, 'colsample_bytree': 0.6491705431005476, 'max_depth': 18}. Best is trial 0 with value: 1.370981872379497.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000723 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000878 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000671 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000725 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:06:02,870] Trial 5 finished with value: 25.067484707072804 and parameters: {'learning_rate': 0.0007757411624588375, 'subsample': 0.932830261644968, 'colsample_bytree': 0.7462136377983566, 'max_depth': 15}. Best is trial 0 with value: 1.370981872379497.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000563 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000824 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001050 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of t

[I 2025-01-14 00:06:07,971] Trial 6 finished with value: 1.3593963141118812 and parameters: {'learning_rate': 0.024204509549502175, 'subsample': 0.9207626812668382, 'colsample_bytree': 0.9110970035176497, 'max_depth': 16}. Best is trial 6 with value: 1.3593963141118812.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001127 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000884 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000953 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000912 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:06:13,140] Trial 7 finished with value: 1.5214938074544793 and parameters: {'learning_rate': 0.017318530439391022, 'subsample': 0.9016193873358322, 'colsample_bytree': 0.6660909849464816, 'max_depth': 6}. Best is trial 6 with value: 1.3593963141118812.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001003 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000970 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001088 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001022 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:06:35,286] Trial 8 finished with value: 1.363994588811226 and parameters: {'learning_rate': 0.03172435680448408, 'subsample': 0.9315495498373466, 'colsample_bytree': 0.842654719227759, 'max_depth': 12}. Best is trial 6 with value: 1.3593963141118812.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001546 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000936 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001835 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of t

[I 2025-01-14 00:07:02,299] Trial 9 finished with value: 1.3865789161149897 and parameters: {'learning_rate': 0.012399666063303106, 'subsample': 0.9896531469810408, 'colsample_bytree': 0.6944064805688435, 'max_depth': 15}. Best is trial 6 with value: 1.3593963141118812.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001827 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001811 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000946 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of t

[I 2025-01-14 00:07:22,476] Trial 10 finished with value: 11.202178024106328 and parameters: {'learning_rate': 0.0024023532867942757, 'subsample': 0.7692686930139006, 'colsample_bytree': 0.9702219174062474, 'max_depth': 20}. Best is trial 6 with value: 1.3593963141118812.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000777 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000818 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000853 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000932 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:07:27,654] Trial 11 finished with value: 1.3858778572437305 and parameters: {'learning_rate': 0.047117473598993154, 'subsample': 0.8152971454158278, 'colsample_bytree': 0.8724313823189602, 'max_depth': 11}. Best is trial 6 with value: 1.3593963141118812.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000972 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000806 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000862 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000795 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:07:32,802] Trial 12 finished with value: 5.925454462757149 and parameters: {'learning_rate': 0.003777654954354395, 'subsample': 0.982044597600772, 'colsample_bytree': 0.8973675310482578, 'max_depth': 12}. Best is trial 6 with value: 1.3593963141118812.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001007 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001066 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000893 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000765 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:07:37,527] Trial 13 finished with value: 1.3747979654925433 and parameters: {'learning_rate': 0.03786191343344706, 'subsample': 0.82307032147293, 'colsample_bytree': 0.865183758400081, 'max_depth': 9}. Best is trial 6 with value: 1.3593963141118812.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001018 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000891 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001034 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000962 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:07:41,692] Trial 14 finished with value: 1.3592337717735794 and parameters: {'learning_rate': 0.025514101246458063, 'subsample': 0.7393087546110306, 'colsample_bytree': 0.9825908647657893, 'max_depth': 17}. Best is trial 14 with value: 1.3592337717735794.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000822 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000970 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000688 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of t

[I 2025-01-14 00:07:47,396] Trial 15 finished with value: 4.704511886965214 and parameters: {'learning_rate': 0.004258314083917379, 'subsample': 0.7404078089095929, 'colsample_bytree': 0.9980158838658166, 'max_depth': 17}. Best is trial 14 with value: 1.3592337717735794.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000925 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000916 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000921 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000944 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:07:51,406] Trial 16 finished with value: 1.4348970666352148 and parameters: {'learning_rate': 0.09921149200916796, 'subsample': 0.7070013382315369, 'colsample_bytree': 0.9185379080012128, 'max_depth': 17}. Best is trial 14 with value: 1.3592337717735794.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000924 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000869 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000891 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000864 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:07:56,699] Trial 17 finished with value: 1.3515890892283875 and parameters: {'learning_rate': 0.022533653413725072, 'subsample': 0.8520320819698826, 'colsample_bytree': 0.9465967009961235, 'max_depth': 16}. Best is trial 17 with value: 1.3515890892283875.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000866 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000868 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000950 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000831 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:08:02,544] Trial 18 finished with value: 4.0626854212830334 and parameters: {'learning_rate': 0.00459855101855815, 'subsample': 0.7690473420428482, 'colsample_bytree': 0.9611079926161924, 'max_depth': 20}. Best is trial 17 with value: 1.3515890892283875.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001046 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001007 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000953 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000984 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:08:07,763] Trial 19 finished with value: 1.85353619080974 and parameters: {'learning_rate': 0.007030229859708872, 'subsample': 0.8463938201328335, 'colsample_bytree': 0.9489382941412058, 'max_depth': 14}. Best is trial 17 with value: 1.3515890892283875.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001084 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001116 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001110 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000916 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:08:11,827] Trial 20 finished with value: 1.3542282801812875 and parameters: {'learning_rate': 0.022746416683332767, 'subsample': 0.7233505582481016, 'colsample_bytree': 0.8185315936658923, 'max_depth': 18}. Best is trial 17 with value: 1.3515890892283875.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000891 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000824 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000878 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000802 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:08:17,115] Trial 21 finished with value: 1.3533523160234733 and parameters: {'learning_rate': 0.021319549184787172, 'subsample': 0.7203467321821924, 'colsample_bytree': 0.7866082386810667, 'max_depth': 18}. Best is trial 17 with value: 1.3515890892283875.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000623 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000900 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000883 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of t

[I 2025-01-14 00:08:20,995] Trial 22 finished with value: 1.396892129373279 and parameters: {'learning_rate': 0.05394099287096069, 'subsample': 0.6801569670214311, 'colsample_bytree': 0.8262951049726969, 'max_depth': 19}. Best is trial 17 with value: 1.3515890892283875.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000917 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000799 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000877 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000823 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:08:26,203] Trial 23 finished with value: 1.635293268046611 and parameters: {'learning_rate': 0.008053719422953487, 'subsample': 0.7760131837740979, 'colsample_bytree': 0.7953505999652075, 'max_depth': 19}. Best is trial 17 with value: 1.3515890892283875.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000982 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000987 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000850 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000901 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:08:30,675] Trial 24 finished with value: 1.3494457678915108 and parameters: {'learning_rate': 0.02042695277688345, 'subsample': 0.7253250300419642, 'colsample_bytree': 0.7539526476447496, 'max_depth': 14}. Best is trial 24 with value: 1.3494457678915108.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001012 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000991 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000991 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000859 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:08:35,624] Trial 25 finished with value: 1.4046410348284617 and parameters: {'learning_rate': 0.06514363625833744, 'subsample': 0.6860627772384973, 'colsample_bytree': 0.7470568592311374, 'max_depth': 13}. Best is trial 24 with value: 1.3494457678915108.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000893 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000910 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000971 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000932 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:08:39,484] Trial 26 finished with value: 1.3621993863485116 and parameters: {'learning_rate': 0.015846230780613476, 'subsample': 0.7882433310147715, 'colsample_bytree': 0.7616837193744312, 'max_depth': 10}. Best is trial 24 with value: 1.3494457678915108.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000927 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000918 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001035 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000889 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:08:45,441] Trial 27 finished with value: 1.4088667979672596 and parameters: {'learning_rate': 0.01058851794822927, 'subsample': 0.6393825410281365, 'colsample_bytree': 0.7718546368275806, 'max_depth': 15}. Best is trial 24 with value: 1.3494457678915108.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002315 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000975 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000832 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of t

[I 2025-01-14 00:08:49,137] Trial 28 finished with value: 1.3510167650274636 and parameters: {'learning_rate': 0.019434515766426734, 'subsample': 0.856850556835168, 'colsample_bytree': 0.7289689652714794, 'max_depth': 16}. Best is trial 24 with value: 1.3494457678915108.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001027 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000884 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000926 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 5996, number of used features: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000901 seconds.
You can set `force_col_wise=true` t

[I 2025-01-14 00:08:54,722] Trial 29 finished with value: 2.5048364613681016 and parameters: {'learning_rate': 0.006168374874746289, 'subsample': 0.8554363701487209, 'colsample_bytree': 0.7039761552363876, 'max_depth': 13}. Best is trial 24 with value: 1.3494457678915108.


Best Model: lightgbm with RMSE: 1.3494457678915108
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002734 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 7497, number of used features: 37
[LightGBM] [Info] Start training from score 62.331949
제출 파일이 'submission.csv'로 저장되었습니다.
