In [2]:
import pandas as pd
import lightgbm as lgb
from lightgbm import early_stopping
from sklearn.metrics import root_mean_squared_error
from sklearn.model_selection import train_test_split
import optuna

In [3]:
train = pd.read_csv("Data/filled_train.csv")
test = pd.read_csv("Data/filled_test.csv")
extra = pd.read_csv("Data/filled_extra.csv")
submission = pd.read_csv("Data/sample_submission.csv")

In [4]:
merge_train= pd.concat([train, extra])

In [5]:
merge_train.head()

Unnamed: 0,Brand,Material,Size,Compartments,Laptop Compartment,Waterproof,Style,Color,Weight Capacity (kg),id,Price
0,Jansport,Leather,Medium,7.0,Yes,No,Tote,Black,11.611723,0,112.15875
1,Jansport,Canvas,Small,10.0,Yes,Yes,Messenger,Green,27.078537,1,68.88056
2,Under Armour,Leather,Small,2.0,Yes,No,Messenger,Red,16.64376,2,39.1732
3,Nike,Nylon,Small,8.0,Yes,No,Messenger,Green,12.93722,3,80.60793
4,Adidas,Canvas,Medium,1.0,Yes,Yes,Messenger,Green,17.749338,4,86.02312


In [6]:
def analyze_data(dataframe, cat_th=10, car_th=20):
    """
    It gives the names of categorical, numerical and categorical but cardinal variables in the data set. It also performs incomplete data analysis.
    Parameters
    ------
        dataframe: dataframe
            The dataframe from which variable names are to be retrieved
        cat_th: int, optional
            Class threshold value for numeric but categorical variables
        car_th: int, optional
            Class threshold for categorical but cardinal variables

    Returns
    ------
        cat_cols: list
            Categorical variable list
        num_cols: list
            Numerik değişken listesi
        cat_but_car: list
            Categorical view cardinal variable list
    """
    cat_cols = [col for col in dataframe.columns if dataframe[col].dtype == "O"]
    num_cols = [col for col in dataframe.columns if dataframe[col].dtype != "O"]

    num_but_cat = [col for col in num_cols if dataframe[col].nunique() <= cat_th]
    cat_but_car = [col for col in cat_cols if dataframe[col].nunique() >= car_th]

    cat_cols = [col for col in cat_cols if col not in cat_but_car]
    num_cols = [col for col in num_cols if col not in num_but_cat]

    cat_cols = cat_cols + num_but_cat
    
    print(f"Number of Observations: {dataframe.shape[0]}")
    print(f"Number of Variables: {dataframe.shape[1]}")
    print(f'Cat cols: {len(cat_cols)}, Num cols: {len(num_cols)}, Cat but car cols: {len(cat_but_car)}')
    print("\nMissing Data")
    print(dataframe.isna().sum())

    return cat_cols, num_cols, cat_but_car

In [7]:
cat_cols, num_cols, cat_but_car = analyze_data(merge_train)

Number of Observations: 3994318
Number of Variables: 11
Cat cols: 8, Num cols: 3, Cat but car cols: 0

Missing Data
Brand                   0
Material                0
Size                    0
Compartments            0
Laptop Compartment      0
Waterproof              0
Style                   0
Color                   0
Weight Capacity (kg)    0
id                      0
Price                   0
dtype: int64


In [8]:
merge_train[cat_cols] = merge_train[cat_cols].astype("category")

In [9]:

X = merge_train.drop(columns=['Price',"id"])
y = merge_train['Price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### lgb

In [43]:
def objective(trial):
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'num_leaves': trial.suggest_int('num_leaves', 10, 100),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 50),
        'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
    }
    
    model = lgb.LGBMRegressor(**params)
    model.fit(X_train, y_train, eval_set=[(X_test, y_test)], eval_metric='rmse', callbacks=[early_stopping(50)])
    
    y_pred = model.predict(X_test)
    rmse = root_mean_squared_error(y_test, y_pred)
    return rmse

In [44]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50, show_progress_bar=True)

[I 2025-02-18 15:59:30,962] A new study created in memory with name: no-name-0f4771a1-95f9-4ae1-92f5-6d670545aaf6


  0%|          | 0/50 [00:00<?, ?it/s]

  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,031961 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[47]	valid_0's rmse: 38.8787
[I 2025-02-18 15:59:47,591] Trial 0 finished with value: 38.878655534605365 and parameters: {'learning_rate': 0.10968224281161448, 'n_estimators': 908, 'num_leaves': 74, 'max_depth': 11, 'min_child_samples': 7, 'subsample': 0.5531620292657644, 'colsample_bytree': 0.9969349572751252}. Best is trial 0 with value: 38.878655534605365.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,023844 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[336]	valid_0's rmse: 38.8761
[I 2025-02-18 16:00:17,314] Trial 1 finished with value: 38.87613898687228 and parameters: {'learning_rate': 0.08541216375664075, 'n_estimators': 735, 'num_leaves': 12, 'max_depth': 10, 'min_child_samples': 18, 'subsample': 0.9648793731854692, 'colsample_bytree': 0.8505861435025405}. Best is trial 1 with value: 38.87613898687228.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,036771 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[239]	valid_0's rmse: 38.8778
[I 2025-02-18 16:01:12,334] Trial 2 finished with value: 38.87784718007837 and parameters: {'learning_rate': 0.02409586168133611, 'n_estimators': 250, 'num_leaves': 63, 'max_depth': 9, 'min_child_samples': 33, 'subsample': 0.6151952530431484, 'colsample_bytree': 0.8211148232453904}. Best is trial 1 with value: 38.87613898687228.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,158454 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[57]	valid_0's rmse: 38.8774
[I 2025-02-18 16:01:28,100] Trial 3 finished with value: 38.87742830498959 and parameters: {'learning_rate': 0.08904442603308814, 'n_estimators': 462, 'num_leaves': 95, 'max_depth': 12, 'min_child_samples': 47, 'subsample': 0.9197578610834098, 'colsample_bytree': 0.6411655120093149}. Best is trial 1 with value: 38.87613898687228.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,017543 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[105]	valid_0's rmse: 38.8775
[I 2025-02-18 16:01:58,301] Trial 4 finished with value: 38.877450187043806 and parameters: {'learning_rate': 0.0669443051792949, 'n_estimators': 105, 'num_leaves': 75, 'max_depth': 9, 'min_child_samples': 39, 'subsample': 0.6387218470690881, 'colsample_bytree': 0.6000423130346428}. Best is trial 1 with value: 38.87613898687228.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,018120 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[431]	valid_0's rmse: 38.8767
[I 2025-02-18 16:03:39,029] Trial 5 finished with value: 38.8766974539234 and parameters: {'learning_rate': 0.024172195203407073, 'n_estimators': 918, 'num_leaves': 49, 'max_depth': 9, 'min_child_samples': 17, 'subsample': 0.868966386886542, 'colsample_bytree': 0.5592804576593542}. Best is trial 1 with value: 38.87613898687228.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,038275 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[182]	valid_0's rmse: 38.8762
[I 2025-02-18 16:04:12,269] Trial 6 finished with value: 38.87618906983608 and parameters: {'learning_rate': 0.08960349398762017, 'n_estimators': 774, 'num_leaves': 96, 'max_depth': 5, 'min_child_samples': 25, 'subsample': 0.9006295402076896, 'colsample_bytree': 0.8206273943262881}. Best is trial 1 with value: 38.87613898687228.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,029136 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[86]	valid_0's rmse: 38.877
[I 2025-02-18 16:04:36,247] Trial 7 finished with value: 38.876975405761414 and parameters: {'learning_rate': 0.16965214665690356, 'n_estimators': 145, 'num_leaves': 90, 'max_depth': 5, 'min_child_samples': 23, 'subsample': 0.8964109531113169, 'colsample_bytree': 0.5260909787122088}. Best is trial 1 with value: 38.87613898687228.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,036461 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[365]	valid_0's rmse: 38.8777
[I 2025-02-18 16:05:21,327] Trial 8 finished with value: 38.87769386398564 and parameters: {'learning_rate': 0.04931280867204777, 'n_estimators': 957, 'num_leaves': 36, 'max_depth': 15, 'min_child_samples': 5, 'subsample': 0.6302105261639388, 'colsample_bytree': 0.9479301940536375}. Best is trial 1 with value: 38.87613898687228.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,021908 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[751]	valid_0's rmse: 38.8762
[I 2025-02-18 16:55:07,655] Trial 9 finished with value: 38.87621351667342 and parameters: {'learning_rate': 0.026369874519925614, 'n_estimators': 752, 'num_leaves': 17, 'max_depth': 6, 'min_child_samples': 38, 'subsample': 0.9130099097822805, 'colsample_bytree': 0.5376374212213233}. Best is trial 1 with value: 38.87613898687228.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,320694 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[549]	valid_0's rmse: 38.8834
[I 2025-02-18 16:55:58,869] Trial 10 finished with value: 38.88341455595386 and parameters: {'learning_rate': 0.01194166363973372, 'n_estimators': 549, 'num_leaves': 12, 'max_depth': 3, 'min_child_samples': 15, 'subsample': 0.7507477766338441, 'colsample_bytree': 0.7104281502107863}. Best is trial 1 with value: 38.87613898687228.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,042337 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[126]	valid_0's rmse: 38.8772
[I 2025-02-18 16:56:21,929] Trial 11 finished with value: 38.87718888314604 and parameters: {'learning_rate': 0.15291478778387727, 'n_estimators': 725, 'num_leaves': 33, 'max_depth': 6, 'min_child_samples': 26, 'subsample': 0.9783202280734937, 'colsample_bytree': 0.833055336082242}. Best is trial 1 with value: 38.87613898687228.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,024187 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[219]	valid_0's rmse: 38.8774
[I 2025-02-18 16:56:58,451] Trial 12 finished with value: 38.877437870757404 and parameters: {'learning_rate': 0.06496951809260984, 'n_estimators': 743, 'num_leaves': 48, 'max_depth': 12, 'min_child_samples': 21, 'subsample': 0.7726899568839847, 'colsample_bytree': 0.8531509351588287}. Best is trial 1 with value: 38.87613898687228.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,024214 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[379]	valid_0's rmse: 38.8754
[I 2025-02-18 16:57:34,082] Trial 13 finished with value: 38.87542267455863 and parameters: {'learning_rate': 0.10926358692136093, 'n_estimators': 644, 'num_leaves': 28, 'max_depth': 3, 'min_child_samples': 13, 'subsample': 0.8251918886493066, 'colsample_bytree': 0.7452015308206249}. Best is trial 13 with value: 38.87542267455863.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,032901 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[365]	valid_0's rmse: 38.8745
[I 2025-02-18 16:58:08,069] Trial 14 finished with value: 38.87447268266082 and parameters: {'learning_rate': 0.1925289023722861, 'n_estimators': 419, 'num_leaves': 25, 'max_depth': 3, 'min_child_samples': 13, 'subsample': 0.8142010590833065, 'colsample_bytree': 0.7330211284041651}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,183426 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[386]	valid_0's rmse: 38.875
[I 2025-02-18 16:58:40,749] Trial 15 finished with value: 38.87502075556452 and parameters: {'learning_rate': 0.1810323491250319, 'n_estimators': 386, 'num_leaves': 28, 'max_depth': 3, 'min_child_samples': 12, 'subsample': 0.8166104702888243, 'colsample_bytree': 0.7205964098654537}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,023903 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[371]	valid_0's rmse: 38.8752
[I 2025-02-18 16:59:13,653] Trial 16 finished with value: 38.8752214371975 and parameters: {'learning_rate': 0.17958015580467143, 'n_estimators': 388, 'num_leaves': 25, 'max_depth': 3, 'min_child_samples': 10, 'subsample': 0.6907911994042073, 'colsample_bytree': 0.6672752815960774}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,024874 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[56]	valid_0's rmse: 38.8775
[I 2025-02-18 16:59:29,026] Trial 17 finished with value: 38.87749233242117 and parameters: {'learning_rate': 0.19165849880779257, 'n_estimators': 332, 'num_leaves': 41, 'max_depth': 7, 'min_child_samples': 11, 'subsample': 0.8077519294608609, 'colsample_bytree': 0.7590795810623303}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,020847 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[179]	valid_0's rmse: 38.8761
[I 2025-02-18 16:59:56,060] Trial 18 finished with value: 38.87606365609581 and parameters: {'learning_rate': 0.13212998731891676, 'n_estimators': 482, 'num_leaves': 22, 'max_depth': 4, 'min_child_samples': 31, 'subsample': 0.6865914889971277, 'colsample_bytree': 0.7043680813152278}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,022392 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[279]	valid_0's rmse: 38.8767
[I 2025-02-18 17:00:50,982] Trial 19 finished with value: 38.876717250186566 and parameters: {'learning_rate': 0.036140745878268106, 'n_estimators': 281, 'num_leaves': 59, 'max_depth': 7, 'min_child_samples': 19, 'subsample': 0.8301590305721946, 'colsample_bytree': 0.7761245693104508}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,037249 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[426]	valid_0's rmse: 38.8784
[I 2025-02-18 17:02:22,651] Trial 20 finished with value: 38.87836804472059 and parameters: {'learning_rate': 0.011983527031332175, 'n_estimators': 426, 'num_leaves': 46, 'max_depth': 7, 'min_child_samples': 9, 'subsample': 0.7077455815037798, 'colsample_bytree': 0.9094958752711975}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,015946 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[344]	valid_0's rmse: 38.8756
[I 2025-02-18 17:02:52,578] Trial 21 finished with value: 38.87561658525131 and parameters: {'learning_rate': 0.18827132581767358, 'n_estimators': 355, 'num_leaves': 26, 'max_depth': 3, 'min_child_samples': 13, 'subsample': 0.706938597740991, 'colsample_bytree': 0.6476101469416353}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,029540 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[283]	valid_0's rmse: 38.8759
[I 2025-02-18 17:03:29,817] Trial 22 finished with value: 38.87589100134738 and parameters: {'learning_rate': 0.13269749829861702, 'n_estimators': 575, 'num_leaves': 21, 'max_depth': 4, 'min_child_samples': 9, 'subsample': 0.7855046185805334, 'colsample_bytree': 0.6741344184174509}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,023952 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[121]	valid_0's rmse: 38.8758
[I 2025-02-18 17:03:49,937] Trial 23 finished with value: 38.87582195403359 and parameters: {'learning_rate': 0.19948746313047597, 'n_estimators': 204, 'num_leaves': 34, 'max_depth': 4, 'min_child_samples': 6, 'subsample': 0.5066242864037064, 'colsample_bytree': 0.6146106239983986}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,023150 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[105]	valid_0's rmse: 38.8763
[I 2025-02-18 17:04:12,836] Trial 24 finished with value: 38.876326869502236 and parameters: {'learning_rate': 0.12787280680129492, 'n_estimators': 394, 'num_leaves': 28, 'max_depth': 5, 'min_child_samples': 15, 'subsample': 0.8525706140020778, 'colsample_bytree': 0.6968086369674531}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,038418 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[417]	valid_0's rmse: 38.8748
[I 2025-02-18 17:04:50,942] Trial 25 finished with value: 38.87484118726315 and parameters: {'learning_rate': 0.15221448120775483, 'n_estimators': 549, 'num_leaves': 39, 'max_depth': 3, 'min_child_samples': 11, 'subsample': 0.7339099344603235, 'colsample_bytree': 0.7378405087059359}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,038936 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[73]	valid_0's rmse: 38.8773
[I 2025-02-18 17:05:10,339] Trial 26 finished with value: 38.87727192117267 and parameters: {'learning_rate': 0.14388941864120888, 'n_estimators': 533, 'num_leaves': 41, 'max_depth': 6, 'min_child_samples': 21, 'subsample': 0.7405688327979195, 'colsample_bytree': 0.7741498136173982}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,032209 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[274]	valid_0's rmse: 38.8759
[I 2025-02-18 17:05:45,115] Trial 27 finished with value: 38.875857046605525 and parameters: {'learning_rate': 0.11274061976911107, 'n_estimators': 670, 'num_leaves': 38, 'max_depth': 4, 'min_child_samples': 13, 'subsample': 0.7902947500994958, 'colsample_bytree': 0.7399428237495949}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,036620 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[169]	valid_0's rmse: 38.877
[I 2025-02-18 17:06:20,855] Trial 28 finished with value: 38.877014595194396 and parameters: {'learning_rate': 0.062355638032290196, 'n_estimators': 633, 'num_leaves': 54, 'max_depth': 14, 'min_child_samples': 31, 'subsample': 0.8549396921909754, 'colsample_bytree': 0.7911170003496663}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,031209 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[474]	valid_0's rmse: 38.877
[I 2025-02-18 17:08:06,878] Trial 29 finished with value: 38.87701813473248 and parameters: {'learning_rate': 0.017428276972503612, 'n_estimators': 838, 'num_leaves': 68, 'max_depth': 8, 'min_child_samples': 5, 'subsample': 0.7443411769932451, 'colsample_bytree': 0.7231320293350479}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,032345 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[194]	valid_0's rmse: 38.8765
[I 2025-02-18 17:08:32,060] Trial 30 finished with value: 38.876460915460086 and parameters: {'learning_rate': 0.10239136614170835, 'n_estimators': 478, 'num_leaves': 17, 'max_depth': 5, 'min_child_samples': 8, 'subsample': 0.5889071333490683, 'colsample_bytree': 0.8849647038391386}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,027161 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[363]	valid_0's rmse: 38.8763
[I 2025-02-18 17:09:02,715] Trial 31 finished with value: 38.876299132109594 and parameters: {'learning_rate': 0.1627714257298075, 'n_estimators': 363, 'num_leaves': 30, 'max_depth': 3, 'min_child_samples': 10, 'subsample': 0.6721783779060274, 'colsample_bytree': 0.6760581885467285}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,019765 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[293]	valid_0's rmse: 38.8758
[I 2025-02-18 17:09:28,723] Trial 32 finished with value: 38.87578691521404 and parameters: {'learning_rate': 0.15999191295530146, 'n_estimators': 294, 'num_leaves': 21, 'max_depth': 3, 'min_child_samples': 15, 'subsample': 0.7238598926565694, 'colsample_bytree': 0.6583574775259694}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,186886 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[289]	valid_0's rmse: 38.8759
[I 2025-02-18 17:10:02,146] Trial 33 finished with value: 38.87591584606632 and parameters: {'learning_rate': 0.1201423017184231, 'n_estimators': 409, 'num_leaves': 15, 'max_depth': 4, 'min_child_samples': 11, 'subsample': 0.7675733977528683, 'colsample_bytree': 0.601374913820313}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,032228 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[314]	valid_0's rmse: 38.8747
[I 2025-02-18 17:10:31,990] Trial 34 finished with value: 38.874749470163486 and parameters: {'learning_rate': 0.19942669907595495, 'n_estimators': 501, 'num_leaves': 25, 'max_depth': 3, 'min_child_samples': 18, 'subsample': 0.6548921823469471, 'colsample_bytree': 0.799493734535687}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,033356 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[139]	valid_0's rmse: 38.8772
[I 2025-02-18 17:10:49,348] Trial 35 finished with value: 38.87724160099251 and parameters: {'learning_rate': 0.1991246999310624, 'n_estimators': 518, 'num_leaves': 11, 'max_depth': 11, 'min_child_samples': 18, 'subsample': 0.6654812899381994, 'colsample_bytree': 0.8064009863451808}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,037987 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[275]	valid_0's rmse: 38.8759
[I 2025-02-18 17:11:25,143] Trial 36 finished with value: 38.875948190013915 and parameters: {'learning_rate': 0.09232474646505633, 'n_estimators': 450, 'num_leaves': 41, 'max_depth': 4, 'min_child_samples': 48, 'subsample': 0.589649630487497, 'colsample_bytree': 0.7315647974016393}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,025321 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[204]	valid_0's rmse: 38.8761
[I 2025-02-18 17:12:00,232] Trial 37 finished with value: 38.876101293081966 and parameters: {'learning_rate': 0.0775700261406833, 'n_estimators': 568, 'num_leaves': 32, 'max_depth': 5, 'min_child_samples': 20, 'subsample': 0.8121486128210449, 'colsample_bytree': 0.873350490077506}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,022483 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[376]	valid_0's rmse: 38.8753
[I 2025-02-18 17:12:37,278] Trial 38 finished with value: 38.87527097349361 and parameters: {'learning_rate': 0.14940873522896325, 'n_estimators': 596, 'num_leaves': 53, 'max_depth': 3, 'min_child_samples': 17, 'subsample': 0.9345954558634997, 'colsample_bytree': 0.7994845466174028}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,025878 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[188]	valid_0's rmse: 38.8767
[I 2025-02-18 17:13:17,396] Trial 39 finished with value: 38.87670241162065 and parameters: {'learning_rate': 0.05029875005692511, 'n_estimators': 192, 'num_leaves': 46, 'max_depth': 8, 'min_child_samples': 22, 'subsample': 0.6509221588536666, 'colsample_bytree': 0.6272623765782275}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,027350 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[274]	valid_0's rmse: 38.8763
[I 2025-02-18 17:14:02,876] Trial 40 finished with value: 38.87633359072831 and parameters: {'learning_rate': 0.07866917055686076, 'n_estimators': 504, 'num_leaves': 23, 'max_depth': 10, 'min_child_samples': 24, 'subsample': 0.8794015258568848, 'colsample_bytree': 0.5784511604636582}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,027555 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[316]	valid_0's rmse: 38.8761
[I 2025-02-18 17:14:29,536] Trial 41 finished with value: 38.876079731108725 and parameters: {'learning_rate': 0.17293806920324378, 'n_estimators': 320, 'num_leaves': 24, 'max_depth': 3, 'min_child_samples': 13, 'subsample': 0.6022980903051086, 'colsample_bytree': 0.6858066094425534}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,033484 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[132]	valid_0's rmse: 38.8766
[I 2025-02-18 17:14:49,684] Trial 42 finished with value: 38.87656048483196 and parameters: {'learning_rate': 0.16427417332775648, 'n_estimators': 394, 'num_leaves': 18, 'max_depth': 4, 'min_child_samples': 7, 'subsample': 0.6863763755491148, 'colsample_bytree': 0.7605940562356776}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,171225 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[445]	valid_0's rmse: 38.8757
[I 2025-02-18 17:15:27,448] Trial 43 finished with value: 38.87574904741174 and parameters: {'learning_rate': 0.13912520270964548, 'n_estimators': 454, 'num_leaves': 87, 'max_depth': 3, 'min_child_samples': 16, 'subsample': 0.6263647405166974, 'colsample_bytree': 0.722166007454407}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,015068 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[105]	valid_0's rmse: 38.8761
[I 2025-02-18 17:15:51,333] Trial 44 finished with value: 38.87612432009412 and parameters: {'learning_rate': 0.17771041500591495, 'n_estimators': 248, 'num_leaves': 35, 'max_depth': 5, 'min_child_samples': 43, 'subsample': 0.5579342016749608, 'colsample_bytree': 0.6580564035737293}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,020313 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[121]	valid_0's rmse: 38.8769
[I 2025-02-18 17:16:14,912] Trial 45 finished with value: 38.87689400687631 and parameters: {'learning_rate': 0.1006824648323099, 'n_estimators': 372, 'num_leaves': 31, 'max_depth': 6, 'min_child_samples': 12, 'subsample': 0.7198527623532346, 'colsample_bytree': 0.8217101081252023}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,028014 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[318]	valid_0's rmse: 38.8755
[I 2025-02-18 17:16:53,888] Trial 46 finished with value: 38.87550817209208 and parameters: {'learning_rate': 0.12154526025815846, 'n_estimators': 425, 'num_leaves': 26, 'max_depth': 4, 'min_child_samples': 27, 'subsample': 0.7665116145685164, 'colsample_bytree': 0.7786856834650411}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,024402 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[369]	valid_0's rmse: 38.8751
[I 2025-02-18 17:17:26,846] Trial 47 finished with value: 38.87508498554177 and parameters: {'learning_rate': 0.14642403145156857, 'n_estimators': 484, 'num_leaves': 10, 'max_depth': 3, 'min_child_samples': 9, 'subsample': 0.8004462635867754, 'colsample_bytree': 0.7446259948503923}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,031086 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[211]	valid_0's rmse: 38.876
[I 2025-02-18 17:17:47,323] Trial 48 finished with value: 38.87601893766653 and parameters: {'learning_rate': 0.14964567521691619, 'n_estimators': 600, 'num_leaves': 10, 'max_depth': 5, 'min_child_samples': 14, 'subsample': 0.7971535253337209, 'colsample_bytree': 0.8420105620356507}. Best is trial 14 with value: 38.87447268266082.


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,032622 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[501]	valid_0's rmse: 38.8782
[I 2025-02-18 17:18:28,684] Trial 49 finished with value: 38.87823797469156 and parameters: {'learning_rate': 0.03176500720276343, 'n_estimators': 501, 'num_leaves': 15, 'max_depth': 3, 'min_child_samples': 6, 'subsample': 0.8392026895254622, 'colsample_bytree': 0.9951300224676578}. Best is trial 14 with value: 38.87447268266082.


In [45]:
study.best_params

{'learning_rate': 0.1925289023722861,
 'n_estimators': 419,
 'num_leaves': 25,
 'max_depth': 3,
 'min_child_samples': 13,
 'subsample': 0.8142010590833065,
 'colsample_bytree': 0.7330211284041651}

In [46]:
study.best_value

38.87447268266082

In [47]:
final_model = lgb.train(study.best_params, lgb.Dataset(X_train, y_train))



[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,039722 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 298
[LightGBM] [Info] Number of data points in the train set: 3195454, number of used features: 9
[LightGBM] [Info] Start training from score 81,361311


In [48]:
test.drop(["id", "Price"], axis=1, inplace=True)

In [49]:
test[cat_cols] = test[cat_cols].astype("category")

In [51]:
submission["Price"] = final_model.predict(test)

In [52]:
submission.to_csv("Data/lgb_submission.csv", index=False)