In [None]:
## using optuna gives the highest r2 score YETT using xgboost
import pandas as pd
import numpy as np
import joblib
import optuna
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
file_path = "dataset_true_score.xlsx"
df = pd.read_excel(file_path)

features = ["cost","pop_density", "traffic_rte", "visibility", "avg_price_level", "Comp_Score"]
target = "true_score"

# Convert target to numeric & drop missing values
df[target] = pd.to_numeric(df[target], errors='coerce')
df = df.dropna()

# Define X (features) and y (target)
X = df[features]
y = df[target]

# Train-test split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features using StandardScaler
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define Optuna optimization function
def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 1000, step=50),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
        "max_depth": trial.suggest_int("max_depth", 3, 10),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "reg_alpha": trial.suggest_float("reg_alpha", 0.0, 1.0),
        "reg_lambda": trial.suggest_float("reg_lambda", 0.0, 1.0)
    }
    
    model = XGBRegressor(**params, random_state=42, eval_metric="rmse",early_stopping_rounds=50)
    model.fit(X_train_scaled, y_train, eval_set=[(X_test_scaled, y_test)], verbose=False)
    
    y_pred = model.predict(X_test_scaled)
    return mean_squared_error(y_test, y_pred)

# Run Optuna optimization
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# Train final model with best parameters
best_params = study.best_params
xgb = XGBRegressor(**best_params, random_state=42, eval_metric="rmse", early_stopping_rounds=50)
xgb.fit(X_train_scaled, y_train, eval_set=[(X_test_scaled, y_test)], verbose=False)

# Save the trained model
joblib.dump(xgb, "xgb_model_optuna.pkl")
joblib.dump(scaler, "scaler.pkl")
print("Optimized model and scaler saved.")

# Make predictions
y_pred = xgb.predict(X_test_scaled)

# Evaluate model
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Display results
print("Optimized Mean Squared Error:", mse)
print("Optimized Root Mean Squared Error:", rmse)
print("Optimized R-squared Score:", r2)

# Display sample predictions
for actual, predicted in zip(y_test[:10], y_pred[:10]):
    print(f"Actual: {actual:.4f}, Predicted: {predicted:.4f}")

[I 2025-04-04 23:23:14,155] A new study created in memory with name: no-name-6590b368-aac8-49f1-9454-0e24c9367ef6
[I 2025-04-04 23:23:14,330] Trial 0 finished with value: 2.050125813231675 and parameters: {'n_estimators': 800, 'learning_rate': 0.19090690736953414, 'max_depth': 8, 'colsample_bytree': 0.5031462784473824, 'subsample': 0.6248418399951818, 'reg_alpha': 0.08511876126123563, 'reg_lambda': 0.7278681700507614}. Best is trial 0 with value: 2.050125813231675.
[I 2025-04-04 23:23:14,554] Trial 1 finished with value: 2.015820790859772 and parameters: {'n_estimators': 550, 'learning_rate': 0.11040696375878975, 'max_depth': 8, 'colsample_bytree': 0.8275229006849196, 'subsample': 0.8080147830793789, 'reg_alpha': 0.13431841455346616, 'reg_lambda': 0.980806493561907}. Best is trial 1 with value: 2.015820790859772.
[I 2025-04-04 23:23:14,967] Trial 2 finished with value: 2.0173399217079866 and parameters: {'n_estimators': 250, 'learning_rate': 0.08042620280159227, 'max_depth': 8, 'colsam

Optimized model and scaler saved.
Optimized Mean Squared Error: 1.984845144418669
Optimized Root Mean Squared Error: 1.4088453230992637
Optimized R-squared Score: 0.08329821935238524
Actual: 4.4782, Predicted: 2.5456
Actual: 3.9231, Predicted: 2.4743
Actual: 0.5914, Predicted: 2.9160
Actual: 0.9616, Predicted: 2.0083
Actual: 4.2000, Predicted: 3.2595
Actual: 4.1915, Predicted: 3.5868
Actual: 1.3910, Predicted: 3.4000
Actual: 3.4682, Predicted: 2.8803
Actual: 3.8512, Predicted: 2.8943
Actual: 3.4591, Predicted: 3.1604


In [9]:
#optuna lgbm
import pandas as pd
import numpy as np
import joblib
import optuna
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
file_path = "dataset_true_score.xlsx"
df = pd.read_excel(file_path)

# Define features and target
features = ["cost", "pop_density", "traffic_rte", "visibility", "avg_price_level", "Comp_Score"]
target = "true_score"

# Convert target to numeric & drop missing values
df[target] = pd.to_numeric(df[target], errors='coerce')
df = df.dropna()

# Define X and y
X = df[features]
y = df[target]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define Optuna objective function
def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 1000, step=50),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "num_leaves": trial.suggest_int("num_leaves", 20, 150),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        "reg_alpha": trial.suggest_float("reg_alpha", 0.0, 1.0),
        "reg_lambda": trial.suggest_float("reg_lambda", 0.0, 1.0)
    }
    
    model = LGBMRegressor(**params, random_state=42,early_stopping_rounds=50)
    model.fit(X_train_scaled, y_train, eval_set=[(X_test_scaled, y_test)],
                callbacks=[])
    
    y_pred = model.predict(X_test_scaled)
    return mean_squared_error(y_test, y_pred)

# Run Optuna
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# Train final model with best parameters
best_params = study.best_params
lgbm = LGBMRegressor(**best_params, random_state=42,early_stopping_rounds=50)
lgbm.fit(X_train_scaled, y_train, eval_set=[(X_test_scaled, y_test)],
          callbacks=[])

# Save model and scaler
joblib.dump(lgbm, "lgbm_model_optuna.pkl")
joblib.dump(scaler, "scaler.pkl")
print("Optimized LGBM model and scaler saved.")

# Predictions
y_pred = lgbm.predict(X_test_scaled)

# Evaluation
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("Optimized Mean Squared Error:", mse)
print("Optimized Root Mean Squared Error:", rmse)
print("Optimized R-squared Score:", r2)

# Show sample predictions
for actual, predicted in zip(y_test[:10], y_pred[:10]):
    print(f"Actual: {actual:.4f}, Predicted: {predicted:.4f}")

[I 2025-04-04 22:56:27,685] A new study created in memory with name: no-name-125e00a1-5502-4639-b3d1-59e66c61070e


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000389 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[650]	valid_0's l2: 2.03859


[I 2025-04-04 22:56:27,919] Trial 0 finished with value: 2.0385900764337284 and parameters: {'n_estimators': 650, 'learning_rate': 0.011075095757220647, 'max_depth': 3, 'num_leaves': 137, 'subsample': 0.7987850926460395, 'colsample_bytree': 0.7360939486492064, 'reg_alpha': 0.5886237802425643, 'reg_lambda': 0.8828198687791134}. Best is trial 0 with value: 2.0385900764337284.
[I 2025-04-04 22:56:28,068] Trial 1 finished with value: 2.0269734972997826 and parameters: {'n_estimators': 950, 'learning_rate': 0.09685387626360314, 'max_depth': 4, 'num_leaves': 63, 'subsample': 0.8514004071419627, 'colsample_bytree': 0.9359736636607736, 'reg_alpha': 0.5353580951309607, 'reg_lambda': 0.440420108584167}. Best is trial 1 with value: 2.0269734972997826.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000264 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[275]	valid_0's l2: 2.02697
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000299 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:28,317] Trial 2 finished with value: 2.0176343225380617 and parameters: {'n_estimators': 1000, 'learning_rate': 0.049103571923716616, 'max_depth': 7, 'num_leaves': 50, 'subsample': 0.8842225951259082, 'colsample_bytree': 0.9726584952103241, 'reg_alpha': 0.15323783278740233, 'reg_lambda': 0.9980447572987893}. Best is trial 2 with value: 2.0176343225380617.


Early stopping, best iteration is:
[275]	valid_0's l2: 2.01763


[I 2025-04-04 22:56:28,419] Trial 3 finished with value: 2.0510720345700784 and parameters: {'n_estimators': 250, 'learning_rate': 0.011471500137765054, 'max_depth': 3, 'num_leaves': 41, 'subsample': 0.5357018271761027, 'colsample_bytree': 0.6722400033843712, 'reg_alpha': 0.04044084478123089, 'reg_lambda': 0.20607580664565928}. Best is trial 2 with value: 2.0176343225380617.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000264 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[250]	valid_0's l2: 2.05107
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000360 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:28,587] Trial 4 finished with value: 2.0151005968916444 and parameters: {'n_estimators': 500, 'learning_rate': 0.08467740605042859, 'max_depth': 7, 'num_leaves': 106, 'subsample': 0.6376246349793051, 'colsample_bytree': 0.758431838067803, 'reg_alpha': 0.4487562263280823, 'reg_lambda': 0.9001644026873024}. Best is trial 4 with value: 2.0151005968916444.
[I 2025-04-04 22:56:28,678] Trial 5 finished with value: 2.024549706823736 and parameters: {'n_estimators': 150, 'learning_rate': 0.06413161971862764, 'max_depth': 5, 'num_leaves': 37, 'subsample': 0.6214922005405257, 'colsample_bytree': 0.7453113431526981, 'reg_alpha': 0.6073784142967267, 'reg_lambda': 0.5459968955582478}. Best is trial 4 with value: 2.0151005968916444.


Early stopping, best iteration is:
[139]	valid_0's l2: 2.0151
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000446 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[129]	valid_0's l2: 2.02455
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000271 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:28,795] Trial 6 finished with value: 2.0173959091359692 and parameters: {'n_estimators': 200, 'learning_rate': 0.08078050013772044, 'max_depth': 10, 'num_leaves': 28, 'subsample': 0.6570760670394293, 'colsample_bytree': 0.7440175579933009, 'reg_alpha': 0.800877233905749, 'reg_lambda': 0.19011979766835396}. Best is trial 4 with value: 2.0151005968916444.
[I 2025-04-04 22:56:28,883] Trial 7 finished with value: 2.0499035771963614 and parameters: {'n_estimators': 800, 'learning_rate': 0.2828877630049861, 'max_depth': 10, 'num_leaves': 129, 'subsample': 0.7567144029301155, 'colsample_bytree': 0.7233393389149543, 'reg_alpha': 0.30431947008847793, 'reg_lambda': 0.11020830243279811}. Best is trial 4 with value: 2.0151005968916444.
[I 2025-04-04 22:56:28,937] Trial 8 finished with value: 2.0349224793855285 and parameters: {'n_estimators': 400, 'learning_rate': 0.2488043505569717, 'max_depth': 3, 'num_leaves': 36, 'subsample': 0.729726871776395, 'colsample_bytree': 0.9862949

Did not meet early stopping. Best iteration is:
[169]	valid_0's l2: 2.0174
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000466 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[9]	valid_0's l2: 2.0499
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000246 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[92]	valid_0's l2: 2.03492

[I 2025-04-04 22:56:29,219] Trial 9 finished with value: 2.016984777985325 and parameters: {'n_estimators': 200, 'learning_rate': 0.020712137161325982, 'max_depth': 8, 'num_leaves': 128, 'subsample': 0.8613057979270785, 'colsample_bytree': 0.6673164654306629, 'reg_alpha': 0.32238347753240104, 'reg_lambda': 0.02247382070212045}. Best is trial 4 with value: 2.0151005968916444.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000076 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:29,717] Trial 10 finished with value: 2.0064145787441316 and parameters: {'n_estimators': 500, 'learning_rate': 0.03483199400289878, 'max_depth': 12, 'num_leaves': 96, 'subsample': 0.5418596497172379, 'colsample_bytree': 0.5055206117023283, 'reg_alpha': 0.4212328940384855, 'reg_lambda': 0.678208880591152}. Best is trial 10 with value: 2.0064145787441316.


Early stopping, best iteration is:
[260]	valid_0's l2: 2.00641
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000396 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:30,110] Trial 11 finished with value: 2.0078350758631736 and parameters: {'n_estimators': 500, 'learning_rate': 0.031303735606437726, 'max_depth': 12, 'num_leaves': 97, 'subsample': 0.5298422824971428, 'colsample_bytree': 0.5107789322804378, 'reg_alpha': 0.36478777916138555, 'reg_lambda': 0.7074097174182167}. Best is trial 10 with value: 2.0064145787441316.


Early stopping, best iteration is:
[240]	valid_0's l2: 2.00784
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000318 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[242]	valid_0's l2: 2.00543


[I 2025-04-04 22:56:30,547] Trial 12 finished with value: 2.0054267845582534 and parameters: {'n_estimators': 600, 'learning_rate': 0.03176045183860877, 'max_depth': 12, 'num_leaves': 87, 'subsample': 0.5021080213829807, 'colsample_bytree': 0.500374547057042, 'reg_alpha': 0.34717806309303034, 'reg_lambda': 0.6358630508631314}. Best is trial 12 with value: 2.0054267845582534.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000297 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:30,912] Trial 13 finished with value: 2.0074599408149343 and parameters: {'n_estimators': 700, 'learning_rate': 0.027965967092988347, 'max_depth': 12, 'num_leaves': 76, 'subsample': 0.5042227054301411, 'colsample_bytree': 0.5041088671940982, 'reg_alpha': 0.18760029877536283, 'reg_lambda': 0.4643827033186221}. Best is trial 12 with value: 2.0054267845582534.


Early stopping, best iteration is:
[260]	valid_0's l2: 2.00746
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000365 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:31,246] Trial 14 finished with value: 2.0146783707350306 and parameters: {'n_estimators': 400, 'learning_rate': 0.03913286118226867, 'max_depth': 10, 'num_leaves': 105, 'subsample': 0.591527422290397, 'colsample_bytree': 0.601313961745565, 'reg_alpha': 0.6537305166455692, 'reg_lambda': 0.6541229260027996}. Best is trial 12 with value: 2.0054267845582534.


Early stopping, best iteration is:
[179]	valid_0's l2: 2.01468
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000430 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:31,847] Trial 15 finished with value: 2.006326973287091 and parameters: {'n_estimators': 650, 'learning_rate': 0.018836599728935772, 'max_depth': 11, 'num_leaves': 85, 'subsample': 0.9955365688543589, 'colsample_bytree': 0.5745765735690889, 'reg_alpha': 0.4406426917945271, 'reg_lambda': 0.34428668599031365}. Best is trial 12 with value: 2.0054267845582534.


Early stopping, best iteration is:
[446]	valid_0's l2: 2.00633
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000223 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[560]	valid_0's l2: 2.0073


[I 2025-04-04 22:56:32,471] Trial 16 finished with value: 2.0073035780150996 and parameters: {'n_estimators': 800, 'learning_rate': 0.01898635885914381, 'max_depth': 9, 'num_leaves': 72, 'subsample': 0.9663449619944534, 'colsample_bytree': 0.579112094783234, 'reg_alpha': 0.7431058294907856, 'reg_lambda': 0.32703872719649657}. Best is trial 12 with value: 2.0054267845582534.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000333 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:33,089] Trial 17 finished with value: 2.0071794357072315 and parameters: {'n_estimators': 650, 'learning_rate': 0.01804018555427845, 'max_depth': 11, 'num_leaves': 85, 'subsample': 0.9894192755059676, 'colsample_bytree': 0.6041851909009721, 'reg_alpha': 0.22979748590597893, 'reg_lambda': 0.33639822842201195}. Best is trial 12 with value: 2.0054267845582534.


Early stopping, best iteration is:
[456]	valid_0's l2: 2.00718
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000272 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:33,875] Trial 18 finished with value: 2.008651566631701 and parameters: {'n_estimators': 800, 'learning_rate': 0.015027368361546866, 'max_depth': 11, 'num_leaves': 150, 'subsample': 0.9295781265847769, 'colsample_bytree': 0.8686158432743547, 'reg_alpha': 0.4956805477107982, 'reg_lambda': 0.5607352331973787}. Best is trial 12 with value: 2.0054267845582534.


Early stopping, best iteration is:
[400]	valid_0's l2: 2.00865
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000210 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:34,214] Trial 19 finished with value: 2.0133227915512752 and parameters: {'n_estimators': 350, 'learning_rate': 0.02563074380734901, 'max_depth': 8, 'num_leaves': 62, 'subsample': 0.7017797573673138, 'colsample_bytree': 0.5572478582196341, 'reg_alpha': 0.001742966155257819, 'reg_lambda': 0.3254129435973294}. Best is trial 12 with value: 2.0054267845582534.


Did not meet early stopping. Best iteration is:
[350]	valid_0's l2: 2.01332
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000367 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:34,333] Trial 20 finished with value: 2.02783697257151 and parameters: {'n_estimators': 700, 'learning_rate': 0.17909909624696246, 'max_depth': 6, 'num_leaves': 115, 'subsample': 0.802506182121645, 'colsample_bytree': 0.8324211329496034, 'reg_alpha': 0.1011003564427913, 'reg_lambda': 0.8099108112653006}. Best is trial 12 with value: 2.0054267845582534.


Early stopping, best iteration is:
[90]	valid_0's l2: 2.02784
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000352 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:34,604] Trial 21 finished with value: 2.0084770197733026 and parameters: {'n_estimators': 600, 'learning_rate': 0.04085092517658909, 'max_depth': 12, 'num_leaves': 90, 'subsample': 0.5884131414215511, 'colsample_bytree': 0.5396337049521324, 'reg_alpha': 0.42037559494082577, 'reg_lambda': 0.5885195305497136}. Best is trial 12 with value: 2.0054267845582534.


Early stopping, best iteration is:
[140]	valid_0's l2: 2.00848
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000259 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:34,917] Trial 22 finished with value: 2.0124885888396737 and parameters: {'n_estimators': 550, 'learning_rate': 0.03407008283475921, 'max_depth': 11, 'num_leaves': 82, 'subsample': 0.560418070371396, 'colsample_bytree': 0.6328216142069989, 'reg_alpha': 0.37463142495388474, 'reg_lambda': 0.41218878059372865}. Best is trial 12 with value: 2.0054267845582534.


Early stopping, best iteration is:
[186]	valid_0's l2: 2.01249
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000333 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:35,196] Trial 23 finished with value: 2.0109773240762876 and parameters: {'n_estimators': 450, 'learning_rate': 0.05646693243652815, 'max_depth': 12, 'num_leaves': 95, 'subsample': 0.5006858891645395, 'colsample_bytree': 0.5365426834301302, 'reg_alpha': 0.4504119467529964, 'reg_lambda': 0.6668573672757302}. Best is trial 12 with value: 2.0054267845582534.


Early stopping, best iteration is:
[129]	valid_0's l2: 2.01098
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000258 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:35,632] Trial 24 finished with value: 2.0107499478331077 and parameters: {'n_estimators': 550, 'learning_rate': 0.025658830868997163, 'max_depth': 9, 'num_leaves': 113, 'subsample': 0.677821019329789, 'colsample_bytree': 0.5123467606412773, 'reg_alpha': 0.2568472648285316, 'reg_lambda': 0.7824557093465583}. Best is trial 12 with value: 2.0054267845582534.


Early stopping, best iteration is:
[272]	valid_0's l2: 2.01075
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000088 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:36,022] Trial 25 finished with value: 2.018566934981651 and parameters: {'n_estimators': 300, 'learning_rate': 0.013824319462940993, 'max_depth': 11, 'num_leaves': 67, 'subsample': 0.5766581039914741, 'colsample_bytree': 0.5731171588768081, 'reg_alpha': 0.5356076972045358, 'reg_lambda': 0.6124510627656023}. Best is trial 12 with value: 2.0054267845582534.
[I 2025-04-04 22:56:36,131] Trial 26 finished with value: 2.0114264610065034 and parameters: {'n_estimators': 900, 'learning_rate': 0.11995507971672838, 'max_depth': 9, 'num_leaves': 52, 'subsample': 0.9186376504354079, 'colsample_bytree': 0.64124677577021, 'reg_alpha': 0.298380789421575, 'reg_lambda': 0.49350486621889583}. Best is trial 12 with value: 2.0054267845582534.


Did not meet early stopping. Best iteration is:
[300]	valid_0's l2: 2.01857
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000292 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[52]	valid_0's l2: 2.01143
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000243 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[356]	valid_0's l2: 2.0

[I 2025-04-04 22:56:36,617] Trial 27 finished with value: 2.010364259314882 and parameters: {'n_estimators': 750, 'learning_rate': 0.02238923905149866, 'max_depth': 12, 'num_leaves': 79, 'subsample': 0.5455866774501688, 'colsample_bytree': 0.5482265263137934, 'reg_alpha': 0.41118094554143225, 'reg_lambda': 0.25025511831257324}. Best is trial 12 with value: 2.0054267845582534.
[I 2025-04-04 22:56:36,840] Trial 28 finished with value: 2.011816969030117 and parameters: {'n_estimators': 600, 'learning_rate': 0.045977263659565365, 'max_depth': 10, 'num_leaves': 98, 'subsample': 0.6231683720950663, 'colsample_bytree': 0.6124833868703713, 'reg_alpha': 0.7394921546246065, 'reg_lambda': 0.39478046538738876}. Best is trial 12 with value: 2.0054267845582534.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000288 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[108]	valid_0's l2: 2.01182
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000455 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:37,480] Trial 29 finished with value: 2.0072554610281057 and parameters: {'n_estimators': 650, 'learning_rate': 0.016235004850371697, 'max_depth': 11, 'num_leaves': 109, 'subsample': 0.7795037285828554, 'colsample_bytree': 0.6785694819251037, 'reg_alpha': 0.6473328100583946, 'reg_lambda': 0.845473397534207}. Best is trial 12 with value: 2.0054267845582534.


Early stopping, best iteration is:
[382]	valid_0's l2: 2.00726
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001728 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[444]	valid_0's l2: 2.00869


[I 2025-04-04 22:56:38,306] Trial 30 finished with value: 2.0086856050807307 and parameters: {'n_estimators': 450, 'learning_rate': 0.010905597705370196, 'max_depth': 12, 'num_leaves': 122, 'subsample': 0.8074291084925125, 'colsample_bytree': 0.8016605161253203, 'reg_alpha': 0.5363189566137215, 'reg_lambda': 0.5362077641693078}. Best is trial 12 with value: 2.0054267845582534.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000556 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[496]	valid_0's l2: 2.00878


[I 2025-04-04 22:56:37,541] Trial 31 finished with value: 2.0087827165456633 and parameters: {'n_estimators': 650, 'learning_rate': 0.016869231803508977, 'max_depth': 11, 'num_leaves': 86, 'subsample': 0.9935355731473724, 'colsample_bytree': 0.5894999498132664, 'reg_alpha': 0.2238280398302358, 'reg_lambda': 0.3271850603557739}. Best is trial 12 with value: 2.0054267845582534.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000390 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:37,929] Trial 32 finished with value: 2.0094231658893524 and parameters: {'n_estimators': 600, 'learning_rate': 0.02172078737129527, 'max_depth': 11, 'num_leaves': 89, 'subsample': 0.9995827353905351, 'colsample_bytree': 0.5291792811339011, 'reg_alpha': 0.12916956588988765, 'reg_lambda': 0.3524294792988849}. Best is trial 12 with value: 2.0054267845582534.


Early stopping, best iteration is:
[236]	valid_0's l2: 2.00942
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000262 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:38,251] Trial 33 finished with value: 2.0142669741498778 and parameters: {'n_estimators': 700, 'learning_rate': 0.035856056699228554, 'max_depth': 10, 'num_leaves': 71, 'subsample': 0.9509191242637417, 'colsample_bytree': 0.6170931746915064, 'reg_alpha': 0.27501454871713926, 'reg_lambda': 0.2327432147336309}. Best is trial 12 with value: 2.0054267845582534.


Early stopping, best iteration is:
[217]	valid_0's l2: 2.01427
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000251 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[652]	valid_0's l2: 2.00937


[I 2025-04-04 22:56:39,007] Trial 34 finished with value: 2.0093667420336567 and parameters: {'n_estimators': 900, 'learning_rate': 0.013295369588525199, 'max_depth': 11, 'num_leaves': 62, 'subsample': 0.8938583395134451, 'colsample_bytree': 0.5710739412131796, 'reg_alpha': 0.3500275867673502, 'reg_lambda': 0.9342992615105215}. Best is trial 12 with value: 2.0054267845582534.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000396 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[550]	valid_0's l2: 2.01059


[I 2025-04-04 22:56:39,857] Trial 35 finished with value: 2.0105873916094437 and parameters: {'n_estimators': 550, 'learning_rate': 0.010096731934878039, 'max_depth': 12, 'num_leaves': 102, 'subsample': 0.8300909678974214, 'colsample_bytree': 0.6943579871350493, 'reg_alpha': 0.21817533486972762, 'reg_lambda': 0.13045093532364146}. Best is trial 12 with value: 2.0054267845582534.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000528 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:40,283] Trial 36 finished with value: 2.0098885841231393 and parameters: {'n_estimators': 500, 'learning_rate': 0.029216343306654874, 'max_depth': 9, 'num_leaves': 56, 'subsample': 0.9634812873511424, 'colsample_bytree': 0.5017430012116515, 'reg_alpha': 0.4832018797069287, 'reg_lambda': 0.45097277832720795}. Best is trial 12 with value: 2.0054267845582534.


Early stopping, best iteration is:
[413]	valid_0's l2: 2.00989


[I 2025-04-04 22:56:40,504] Trial 37 finished with value: 2.019404198523904 and parameters: {'n_estimators': 750, 'learning_rate': 0.05816119665030009, 'max_depth': 6, 'num_leaves': 91, 'subsample': 0.9070373845813753, 'colsample_bytree': 0.9213990774459919, 'reg_alpha': 0.5799427024276103, 'reg_lambda': 0.2630144981328129}. Best is trial 12 with value: 2.0054267845582534.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000494 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[233]	valid_0's l2: 2.0194
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000375 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:40,836] Trial 38 finished with value: 2.010843529002983 and parameters: {'n_estimators': 600, 'learning_rate': 0.04776501028557303, 'max_depth': 10, 'num_leaves': 81, 'subsample': 0.8663148950619944, 'colsample_bytree': 0.6458955068809282, 'reg_alpha': 0.06686327831464767, 'reg_lambda': 0.7534325129288153}. Best is trial 12 with value: 2.0054267845582534.


Early stopping, best iteration is:
[232]	valid_0's l2: 2.01084
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002005 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:41,099] Trial 39 finished with value: 2.012065551995234 and parameters: {'n_estimators': 450, 'learning_rate': 0.07615370417347848, 'max_depth': 11, 'num_leaves': 20, 'subsample': 0.9384835470683129, 'colsample_bytree': 0.5550689189093333, 'reg_alpha': 0.17025084390720294, 'reg_lambda': 0.637165624735394}. Best is trial 12 with value: 2.0054267845582534.


Early stopping, best iteration is:
[328]	valid_0's l2: 2.01207
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000353 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[416]	valid_0's l2: 2.00622


[I 2025-04-04 22:56:41,839] Trial 40 finished with value: 2.006218589498921 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01792876654342798, 'max_depth': 12, 'num_leaves': 118, 'subsample': 0.525457428930657, 'colsample_bytree': 0.5941909721589683, 'reg_alpha': 0.38828614905220155, 'reg_lambda': 0.5007765860689464}. Best is trial 12 with value: 2.0054267845582534.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000290 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:42,486] Trial 41 finished with value: 2.0071297001541457 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01850608955945555, 'max_depth': 12, 'num_leaves': 119, 'subsample': 0.519925725726687, 'colsample_bytree': 0.5945587861444313, 'reg_alpha': 0.40242277466854187, 'reg_lambda': 0.5005793152127181}. Best is trial 12 with value: 2.0054267845582534.


Early stopping, best iteration is:
[345]	valid_0's l2: 2.00713
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003174 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:43,069] Trial 42 finished with value: 2.005026757394453 and parameters: {'n_estimators': 1000, 'learning_rate': 0.02329425214091001, 'max_depth': 12, 'num_leaves': 141, 'subsample': 0.5303186466741321, 'colsample_bytree': 0.5262270900922627, 'reg_alpha': 0.40937681202899134, 'reg_lambda': 0.5079737682526126}. Best is trial 42 with value: 2.005026757394453.


Early stopping, best iteration is:
[240]	valid_0's l2: 2.00503
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000229 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:43,580] Trial 43 finished with value: 2.006943761529273 and parameters: {'n_estimators': 950, 'learning_rate': 0.026079207320274824, 'max_depth': 12, 'num_leaves': 140, 'subsample': 0.5609725177153304, 'colsample_bytree': 0.5241500571963505, 'reg_alpha': 0.3353971469268816, 'reg_lambda': 0.693067010260179}. Best is trial 42 with value: 2.005026757394453.


Early stopping, best iteration is:
[236]	valid_0's l2: 2.00694
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000244 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:44,182] Trial 44 finished with value: 2.005448062120103 and parameters: {'n_estimators': 900, 'learning_rate': 0.023092792246120514, 'max_depth': 12, 'num_leaves': 139, 'subsample': 0.531407441325756, 'colsample_bytree': 0.5577086216355112, 'reg_alpha': 0.4663776802085403, 'reg_lambda': 0.5364945049331682}. Best is trial 42 with value: 2.005026757394453.


Early stopping, best iteration is:
[275]	valid_0's l2: 2.00545
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003392 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:45,160] Trial 45 finished with value: 2.0012992721116314 and parameters: {'n_estimators': 950, 'learning_rate': 0.013198955023221636, 'max_depth': 12, 'num_leaves': 138, 'subsample': 0.6080272182484356, 'colsample_bytree': 0.5569550970650031, 'reg_alpha': 0.46727263299529515, 'reg_lambda': 0.5339152628251859}. Best is trial 45 with value: 2.0012992721116314.


Early stopping, best iteration is:
[473]	valid_0's l2: 2.0013
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000387 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:45,576] Trial 46 finished with value: 2.0295916584687603 and parameters: {'n_estimators': 900, 'learning_rate': 0.013319529560606655, 'max_depth': 4, 'num_leaves': 140, 'subsample': 0.606719838090603, 'colsample_bytree': 0.7123084057476317, 'reg_alpha': 0.5868326912827161, 'reg_lambda': 0.5147352767205494}. Best is trial 45 with value: 2.0012992721116314.


Did not meet early stopping. Best iteration is:
[895]	valid_0's l2: 2.02959
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000289 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:46,367] Trial 47 finished with value: 2.004572554350415 and parameters: {'n_estimators': 950, 'learning_rate': 0.022818714573273162, 'max_depth': 12, 'num_leaves': 150, 'subsample': 0.5167505019978782, 'colsample_bytree': 0.7704060106555088, 'reg_alpha': 0.4731544658173536, 'reg_lambda': 0.6167108087331317}. Best is trial 45 with value: 2.0012992721116314.


Early stopping, best iteration is:
[381]	valid_0's l2: 2.00457
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000378 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:47,255] Trial 48 finished with value: 2.006449448791908 and parameters: {'n_estimators': 950, 'learning_rate': 0.022323273176813305, 'max_depth': 12, 'num_leaves': 135, 'subsample': 0.6510973459584889, 'colsample_bytree': 0.764407929982302, 'reg_alpha': 0.48355172421930565, 'reg_lambda': 0.5846978980937738}. Best is trial 45 with value: 2.0012992721116314.


Early stopping, best iteration is:
[415]	valid_0's l2: 2.00645
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000349 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds


[I 2025-04-04 22:56:48,541] Trial 49 finished with value: 2.006564500890695 and parameters: {'n_estimators': 850, 'learning_rate': 0.011904989982508192, 'max_depth': 12, 'num_leaves': 147, 'subsample': 0.564522195369332, 'colsample_bytree': 0.8134952636925892, 'reg_alpha': 0.5359049456539752, 'reg_lambda': 0.6216043345049393}. Best is trial 45 with value: 2.0012992721116314.


Early stopping, best iteration is:
[578]	valid_0's l2: 2.00656
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000220 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1105
[LightGBM] [Info] Number of data points in the train set: 25027, number of used features: 6
[LightGBM] [Info] Start training from score 3.006600
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[473]	valid_0's l2: 2.0013
Optimized LGBM model and scaler saved.
Optimized Mean Squared Error: 2.0012992721116314
Optimized Root Mean Squared Error: 1.4146728498531493
Optimized R-squared Score: 0.07569887176723156
Actual: 4.4782, Predicted: 2.2106
Actual: 3.9231, Predicted: 2.3076
Actual: 0.5914, Predicted: 2.7858
Actual: 0.9616, Predicted: 2.1834
Actual: 4.2000, Predicted: 3.4621
Actual: 4.1915, Predicted: 3.4713
Actual: 1.3910, Predicted: 3.3649
Actual: 3.4682, Predicted: 2.9339
Actual: 3.85



In [2]:
## using optuna gives the highest r2 score YETT using xgboost
import pandas as pd
import numpy as np
import joblib
import optuna
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
file_path = "augmented_dataset_true_score.xlsx"
df = pd.read_excel(file_path)


features = ["cost","pop_density", "traffic_rte", "visibility", "avg_price_level", "Comp_Score"]
target = "true_score"

# Convert target to numeric & drop missing values
df[target] = pd.to_numeric(df[target], errors='coerce')
df = df.dropna()

# Define X (features) and y (target)
X = df[features]
y = df[target]

# Train-test split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features using StandardScaler
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define Optuna optimization function
def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 1000, step=50),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
        "max_depth": trial.suggest_int("max_depth", 3, 10),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "reg_alpha": trial.suggest_float("reg_alpha", 0.0, 1.0),
        "reg_lambda": trial.suggest_float("reg_lambda", 0.0, 1.0)
    }
    
    model = XGBRegressor(**params, random_state=42, eval_metric="rmse",early_stopping_rounds=50)
    model.fit(X_train_scaled, y_train, eval_set=[(X_test_scaled, y_test)], verbose=False)
    
    y_pred = model.predict(X_test_scaled)
    return mean_squared_error(y_test, y_pred)

# Run Optuna optimization
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# Train final model with best parameters
best_params = study.best_params
xgb = XGBRegressor(**best_params, random_state=42, eval_metric="rmse", early_stopping_rounds=50)
xgb.fit(X_train_scaled, y_train, eval_set=[(X_test_scaled, y_test)], verbose=False)

# Save the trained model
joblib.dump(xgb, "xgb_model_optuna.pkl")
joblib.dump(scaler, "scaler.pkl")
print("Optimized model and scaler saved.")

# Make predictions
y_pred = xgb.predict(X_test_scaled)

# Evaluate model
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Display results
print("Optimized Mean Squared Error:", mse)
print("Optimized Root Mean Squared Error:", rmse)
print("Optimized R-squared Score:", r2)

# Display sample predictions
for actual, predicted in zip(y_test[:10], y_pred[:10]):
    print(f"Actual: {actual:.4f}, Predicted: {predicted:.4f}")

[I 2025-04-04 22:40:36,970] A new study created in memory with name: no-name-8c89dedd-d44e-4b52-8bfd-9e7c188b47a1
[I 2025-04-04 22:40:37,865] Trial 0 finished with value: 1.445382394280455 and parameters: {'n_estimators': 600, 'learning_rate': 0.017989490012315052, 'max_depth': 6, 'colsample_bytree': 0.717263785503407, 'subsample': 0.5463071234674459, 'reg_alpha': 0.7204837011089145, 'reg_lambda': 0.5152355056769679}. Best is trial 0 with value: 1.445382394280455.
[I 2025-04-04 22:40:38,115] Trial 1 finished with value: 1.422107008780122 and parameters: {'n_estimators': 800, 'learning_rate': 0.013801271735133546, 'max_depth': 7, 'colsample_bytree': 0.9592784775198833, 'subsample': 0.5185873088667242, 'reg_alpha': 0.42498427954197227, 'reg_lambda': 0.05889666983861619}. Best is trial 1 with value: 1.422107008780122.
[I 2025-04-04 22:40:38,899] Trial 2 finished with value: 1.4096344562572305 and parameters: {'n_estimators': 500, 'learning_rate': 0.056514996289704425, 'max_depth': 6, 'col

Optimized model and scaler saved.
Optimized Mean Squared Error: 1.3651221858185985
Optimized Root Mean Squared Error: 1.16838443408777
Optimized R-squared Score: 0.37754879529145613
Actual: 4.2940, Predicted: 3.0451
Actual: 2.3458, Predicted: 2.3917
Actual: 1.7798, Predicted: 1.6675
Actual: 0.6163, Predicted: 1.2621
Actual: 3.9000, Predicted: 3.6966
Actual: 3.2552, Predicted: 2.2994
Actual: 4.1000, Predicted: 3.5712
Actual: 1.2580, Predicted: 1.7010
Actual: 1.7330, Predicted: 1.3728
Actual: 4.6000, Predicted: 3.8116


In [2]:
import pandas as pd
import numpy as np
import joblib
import optuna
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
file_path = "augmented_dataset_true_score.xlsx"
df = pd.read_excel(file_path)

# Define features and target
features = ["cost", "pop_density", "traffic_rte", "visibility", "avg_price_level", "Comp_Score"]
target = "true_score"

# Convert target to numeric & drop missing values
df[target] = pd.to_numeric(df[target], errors='coerce')
df = df.dropna()

# Define X and y
X = df[features]
y = df[target]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define Optuna objective function
def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 1000, step=50),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "num_leaves": trial.suggest_int("num_leaves", 20, 150),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        "reg_alpha": trial.suggest_float("reg_alpha", 0.0, 1.0),
        "reg_lambda": trial.suggest_float("reg_lambda", 0.0, 1.0)
    }
    
    model = LGBMRegressor(**params, random_state=42,early_stopping_rounds=50)
    model.fit(X_train_scaled, y_train, eval_set=[(X_test_scaled, y_test)],
                callbacks=[])
    
    y_pred = model.predict(X_test_scaled)
    return mean_squared_error(y_test, y_pred)

# Run Optuna
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# Train final model with best parameters
best_params = study.best_params
lgbm = LGBMRegressor(**best_params, random_state=42,early_stopping_rounds=50)
lgbm.fit(X_train_scaled, y_train, eval_set=[(X_test_scaled, y_test)],
          callbacks=[])

# Save model and scaler
joblib.dump(lgbm, "lgbm_model_optuna.pkl")
joblib.dump(scaler, "scaler.pkl")
print("Optimized LGBM model and scaler saved.")

# Predictions
y_pred = lgbm.predict(X_test_scaled)

# Evaluation
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("Optimized Mean Squared Error:", mse)
print("Optimized Root Mean Squared Error:", rmse)
print("Optimized R-squared Score:", r2)

# Show sample predictions
for actual, predicted in zip(y_test[:10], y_pred[:10]):
    print(f"Actual: {actual:.4f}, Predicted: {predicted:.4f}")


[I 2025-04-06 17:23:16,287] A new study created in memory with name: no-name-9cd54aae-47ba-4097-8036-fd89286ee5d5


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000916 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[649]	valid_0's l2: 1.38401


[I 2025-04-06 17:23:17,446] Trial 0 finished with value: 1.3840140616458776 and parameters: {'n_estimators': 650, 'learning_rate': 0.019023552427657093, 'max_depth': 11, 'num_leaves': 81, 'subsample': 0.5967037290640417, 'colsample_bytree': 0.5710500912596709, 'reg_alpha': 0.6264289376493425, 'reg_lambda': 0.9796580142923667}. Best is trial 0 with value: 1.3840140616458776.
[I 2025-04-06 17:23:17,606] Trial 1 finished with value: 1.4628412913696127 and parameters: {'n_estimators': 300, 'learning_rate': 0.05152790937043831, 'max_depth': 3, 'num_leaves': 25, 'subsample': 0.9050731194944805, 'colsample_bytree': 0.6208541780461934, 'reg_alpha': 0.8500659974387313, 'reg_lambda': 0.6024168155193076}. Best is trial 0 with value: 1.3840140616458776.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000782 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[300]	valid_0's l2: 1.46284
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000478 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[200]	valid_0's l2: 1.4883


[I 2025-04-06 17:23:17,860] Trial 2 finished with value: 1.4882978694546511 and parameters: {'n_estimators': 200, 'learning_rate': 0.017038734706100456, 'max_depth': 6, 'num_leaves': 120, 'subsample': 0.9028664498115972, 'colsample_bytree': 0.9898743905953404, 'reg_alpha': 0.46162600607537196, 'reg_lambda': 0.07102271600437171}. Best is trial 0 with value: 1.3840140616458776.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000480 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:18,156] Trial 3 finished with value: 1.4042336200936556 and parameters: {'n_estimators': 750, 'learning_rate': 0.2811636124841898, 'max_depth': 10, 'num_leaves': 148, 'subsample': 0.7622645661970406, 'colsample_bytree': 0.6972270519114259, 'reg_alpha': 0.9848398563328511, 'reg_lambda': 0.30448478531132317}. Best is trial 0 with value: 1.3840140616458776.


Early stopping, best iteration is:
[89]	valid_0's l2: 1.40423
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000465 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:18,573] Trial 4 finished with value: 1.4633052802563122 and parameters: {'n_estimators': 250, 'learning_rate': 0.01565336386031892, 'max_depth': 7, 'num_leaves': 86, 'subsample': 0.5537462179389462, 'colsample_bytree': 0.9615592138008981, 'reg_alpha': 0.31832080431718, 'reg_lambda': 0.016067565292695818}. Best is trial 0 with value: 1.3840140616458776.


Did not meet early stopping. Best iteration is:
[250]	valid_0's l2: 1.46331
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000613 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:18,786] Trial 5 finished with value: 1.4016603369565892 and parameters: {'n_estimators': 600, 'learning_rate': 0.293001474495601, 'max_depth': 10, 'num_leaves': 91, 'subsample': 0.9552129860271077, 'colsample_bytree': 0.8205196040034322, 'reg_alpha': 0.002926240538115743, 'reg_lambda': 0.19470786345520252}. Best is trial 0 with value: 1.3840140616458776.


Early stopping, best iteration is:
[86]	valid_0's l2: 1.40166
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000604 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[200]	valid_0's l2: 1.42657


[I 2025-04-06 17:23:18,899] Trial 6 finished with value: 1.4265664798797562 and parameters: {'n_estimators': 200, 'learning_rate': 0.17852843937701057, 'max_depth': 3, 'num_leaves': 142, 'subsample': 0.5947870310000986, 'colsample_bytree': 0.622016518966197, 'reg_alpha': 0.7797674713572007, 'reg_lambda': 0.956466250222142}. Best is trial 0 with value: 1.3840140616458776.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001059 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:19,608] Trial 7 finished with value: 1.368663239007376 and parameters: {'n_estimators': 1000, 'learning_rate': 0.08237339279191669, 'max_depth': 12, 'num_leaves': 75, 'subsample': 0.5069581968954027, 'colsample_bytree': 0.6626145106115888, 'reg_alpha': 0.7136654287018969, 'reg_lambda': 0.5948575862663452}. Best is trial 7 with value: 1.368663239007376.


Early stopping, best iteration is:
[472]	valid_0's l2: 1.36866
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000708 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:19,893] Trial 8 finished with value: 1.4046481200808856 and parameters: {'n_estimators': 550, 'learning_rate': 0.1745011736163676, 'max_depth': 3, 'num_leaves': 115, 'subsample': 0.9891083069342415, 'colsample_bytree': 0.7884433477610655, 'reg_alpha': 0.4539917199800988, 'reg_lambda': 0.34956587392589034}. Best is trial 7 with value: 1.368663239007376.


Did not meet early stopping. Best iteration is:
[547]	valid_0's l2: 1.40465
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001032 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:20,146] Trial 9 finished with value: 1.4287504387429497 and parameters: {'n_estimators': 150, 'learning_rate': 0.02659436437477797, 'max_depth': 12, 'num_leaves': 67, 'subsample': 0.5184991745776337, 'colsample_bytree': 0.631816262270793, 'reg_alpha': 0.9176432034658865, 'reg_lambda': 0.28673840193447964}. Best is trial 7 with value: 1.368663239007376.


Did not meet early stopping. Best iteration is:
[150]	valid_0's l2: 1.42875
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000477 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:20,867] Trial 10 finished with value: 1.3741226754134233 and parameters: {'n_estimators': 1000, 'learning_rate': 0.07770686259721303, 'max_depth': 8, 'num_leaves': 28, 'subsample': 0.7149004508853349, 'colsample_bytree': 0.8701618708815088, 'reg_alpha': 0.6689917937227278, 'reg_lambda': 0.651628108872521}. Best is trial 7 with value: 1.368663239007376.


Did not meet early stopping. Best iteration is:
[968]	valid_0's l2: 1.37412
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000501 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:21,515] Trial 11 finished with value: 1.370526975231013 and parameters: {'n_estimators': 950, 'learning_rate': 0.08102832246861631, 'max_depth': 9, 'num_leaves': 22, 'subsample': 0.7056185511058053, 'colsample_bytree': 0.8661350882706862, 'reg_alpha': 0.74130182974808, 'reg_lambda': 0.6566071312250857}. Best is trial 7 with value: 1.368663239007376.


Did not meet early stopping. Best iteration is:
[948]	valid_0's l2: 1.37053
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000524 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:22,259] Trial 12 finished with value: 1.3716298683322805 and parameters: {'n_estimators': 1000, 'learning_rate': 0.0724845895067064, 'max_depth': 9, 'num_leaves': 49, 'subsample': 0.6980061332047484, 'colsample_bytree': 0.7161361397918997, 'reg_alpha': 0.7170198324619831, 'reg_lambda': 0.7620014845616017}. Best is trial 7 with value: 1.368663239007376.


Early stopping, best iteration is:
[639]	valid_0's l2: 1.37163
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000470 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:23,291] Trial 13 finished with value: 1.3704248476269933 and parameters: {'n_estimators': 850, 'learning_rate': 0.04394720885017273, 'max_depth': 12, 'num_leaves': 55, 'subsample': 0.791912419574804, 'colsample_bytree': 0.5196071935964528, 'reg_alpha': 0.5506958423988962, 'reg_lambda': 0.5397141632979832}. Best is trial 7 with value: 1.368663239007376.


Did not meet early stopping. Best iteration is:
[849]	valid_0's l2: 1.37042
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000423 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:24,260] Trial 14 finished with value: 1.3695961933938159 and parameters: {'n_estimators': 850, 'learning_rate': 0.03807235496800617, 'max_depth': 12, 'num_leaves': 53, 'subsample': 0.8076474557052855, 'colsample_bytree': 0.5192418713635214, 'reg_alpha': 0.27325102192576567, 'reg_lambda': 0.4444577534786722}. Best is trial 7 with value: 1.368663239007376.


Did not meet early stopping. Best iteration is:
[849]	valid_0's l2: 1.3696
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000181 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:25,362] Trial 15 finished with value: 1.3794064605055665 and parameters: {'n_estimators': 850, 'learning_rate': 0.030989019643675595, 'max_depth': 12, 'num_leaves': 45, 'subsample': 0.8270302721478365, 'colsample_bytree': 0.5053883387688619, 'reg_alpha': 0.17456268883163958, 'reg_lambda': 0.425632983341393}. Best is trial 7 with value: 1.368663239007376.


Did not meet early stopping. Best iteration is:
[849]	valid_0's l2: 1.37941
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000427 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:25,711] Trial 16 finished with value: 1.3931959521035404 and parameters: {'n_estimators': 400, 'learning_rate': 0.12114585734170322, 'max_depth': 5, 'num_leaves': 69, 'subsample': 0.6489049962612651, 'colsample_bytree': 0.5494707906592271, 'reg_alpha': 0.2988421668185343, 'reg_lambda': 0.7938589109711078}. Best is trial 7 with value: 1.368663239007376.


Did not meet early stopping. Best iteration is:
[400]	valid_0's l2: 1.3932
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000504 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[818]	valid_0's l2: 1.36204


[I 2025-04-06 17:23:27,178] Trial 17 finished with value: 1.362037454135999 and parameters: {'n_estimators': 850, 'learning_rate': 0.03470804133374062, 'max_depth': 11, 'num_leaves': 106, 'subsample': 0.8427052998862476, 'colsample_bytree': 0.6676623765076025, 'reg_alpha': 0.29852626373988334, 'reg_lambda': 0.4493528989396546}. Best is trial 17 with value: 1.362037454135999.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000472 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[700]	valid_0's l2: 1.39731


[I 2025-04-06 17:23:28,676] Trial 18 finished with value: 1.3973119013062576 and parameters: {'n_estimators': 700, 'learning_rate': 0.01022615540406613, 'max_depth': 10, 'num_leaves': 105, 'subsample': 0.8561902280022555, 'colsample_bytree': 0.677709548683746, 'reg_alpha': 0.0747899774029121, 'reg_lambda': 0.49525198180385666}. Best is trial 17 with value: 1.362037454135999.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000745 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:29,525] Trial 19 finished with value: 1.3693926204065983 and parameters: {'n_estimators': 900, 'learning_rate': 0.11077884722645474, 'max_depth': 11, 'num_leaves': 131, 'subsample': 0.6370947829946713, 'colsample_bytree': 0.7366966450736462, 'reg_alpha': 0.3827116405132853, 'reg_lambda': 0.7528497123928636}. Best is trial 17 with value: 1.362037454135999.


Early stopping, best iteration is:
[346]	valid_0's l2: 1.36939
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000475 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:30,377] Trial 20 finished with value: 1.3621756913000795 and parameters: {'n_estimators': 500, 'learning_rate': 0.06370235763107444, 'max_depth': 11, 'num_leaves': 98, 'subsample': 0.8564005833311201, 'colsample_bytree': 0.6606684209754823, 'reg_alpha': 0.19953813998147876, 'reg_lambda': 0.163999285278855}. Best is trial 17 with value: 1.362037454135999.


Did not meet early stopping. Best iteration is:
[491]	valid_0's l2: 1.36218
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000509 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:31,281] Trial 21 finished with value: 1.3625653938109739 and parameters: {'n_estimators': 500, 'learning_rate': 0.06008830688258733, 'max_depth': 11, 'num_leaves': 102, 'subsample': 0.8770647075797747, 'colsample_bytree': 0.6579759115060709, 'reg_alpha': 0.16000293171468316, 'reg_lambda': 0.1440591175695712}. Best is trial 17 with value: 1.362037454135999.


Did not meet early stopping. Best iteration is:
[500]	valid_0's l2: 1.36257
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000461 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:32,135] Trial 22 finished with value: 1.367542356531433 and parameters: {'n_estimators': 450, 'learning_rate': 0.05696096321190737, 'max_depth': 11, 'num_leaves': 103, 'subsample': 0.8921396634331092, 'colsample_bytree': 0.7669003292843003, 'reg_alpha': 0.17964054961215753, 'reg_lambda': 0.1402782459512969}. Best is trial 17 with value: 1.362037454135999.


Did not meet early stopping. Best iteration is:
[450]	valid_0's l2: 1.36754
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000468 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[400]	valid_0's l2: 1.38846


[I 2025-04-06 17:23:32,865] Trial 23 finished with value: 1.3884565145289454 and parameters: {'n_estimators': 400, 'learning_rate': 0.02697398520856508, 'max_depth': 9, 'num_leaves': 104, 'subsample': 0.8573836360252317, 'colsample_bytree': 0.5846837599256975, 'reg_alpha': 0.18521147140665545, 'reg_lambda': 0.17310860014274765}. Best is trial 17 with value: 1.362037454135999.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000555 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:33,675] Trial 24 finished with value: 1.3840008118398588 and parameters: {'n_estimators': 500, 'learning_rate': 0.03648340819363833, 'max_depth': 8, 'num_leaves': 95, 'subsample': 0.9281165833109751, 'colsample_bytree': 0.6567820553147468, 'reg_alpha': 0.0859302948799578, 'reg_lambda': 0.23584331476604697}. Best is trial 17 with value: 1.362037454135999.


Did not meet early stopping. Best iteration is:
[500]	valid_0's l2: 1.384
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001142 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[545]	valid_0's l2: 1.36182


[I 2025-04-06 17:23:34,796] Trial 25 finished with value: 1.3618234955995945 and parameters: {'n_estimators': 550, 'learning_rate': 0.05738492414336602, 'max_depth': 11, 'num_leaves': 124, 'subsample': 0.8634451506973061, 'colsample_bytree': 0.7014415140814853, 'reg_alpha': 0.21678640762002457, 'reg_lambda': 0.0963815512363418}. Best is trial 25 with value: 1.3618234955995945.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000803 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:35,358] Trial 26 finished with value: 1.3686464381197587 and parameters: {'n_estimators': 700, 'learning_rate': 0.11795830098649762, 'max_depth': 10, 'num_leaves': 121, 'subsample': 0.8396107381401324, 'colsample_bytree': 0.7286279814408031, 'reg_alpha': 0.2583309791634115, 'reg_lambda': 0.06420551931394763}. Best is trial 25 with value: 1.3618234955995945.


Early stopping, best iteration is:
[223]	valid_0's l2: 1.36865
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001115 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[350]	valid_0's l2: 1.392


[I 2025-04-06 17:23:36,310] Trial 27 finished with value: 1.391998169504012 and parameters: {'n_estimators': 350, 'learning_rate': 0.02284775329199489, 'max_depth': 11, 'num_leaves': 133, 'subsample': 0.7703376726734436, 'colsample_bytree': 0.7964263372634888, 'reg_alpha': 0.35391153359542993, 'reg_lambda': 0.38600271231678696}. Best is trial 25 with value: 1.3618234955995945.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001123 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[600]	valid_0's l2: 1.36658


[I 2025-04-06 17:23:37,492] Trial 28 finished with value: 1.3665776130879452 and parameters: {'n_estimators': 600, 'learning_rate': 0.040935708017881996, 'max_depth': 9, 'num_leaves': 112, 'subsample': 0.9459807126491256, 'colsample_bytree': 0.705393941164275, 'reg_alpha': 0.4138784100474988, 'reg_lambda': 0.2729185354586766}. Best is trial 25 with value: 1.3618234955995945.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001240 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[715]	valid_0's l2: 1.35544


[I 2025-04-06 17:23:39,143] Trial 29 finished with value: 1.3554432770214284 and parameters: {'n_estimators': 750, 'learning_rate': 0.051721181894654855, 'max_depth': 10, 'num_leaves': 130, 'subsample': 0.807559049500166, 'colsample_bytree': 0.582152936788249, 'reg_alpha': 0.5676250991279925, 'reg_lambda': 0.10562939012462058}. Best is trial 29 with value: 1.3554432770214284.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000582 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[750]	valid_0's l2: 1.39271


[I 2025-04-06 17:23:40,324] Trial 30 finished with value: 1.392710322608448 and parameters: {'n_estimators': 750, 'learning_rate': 0.021500502133434112, 'max_depth': 7, 'num_leaves': 132, 'subsample': 0.7939013526154869, 'colsample_bytree': 0.5634718059514316, 'reg_alpha': 0.563222987707797, 'reg_lambda': 0.07364229518559373}. Best is trial 29 with value: 1.3554432770214284.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000798 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[650]	valid_0's l2: 1.36027


[I 2025-04-06 17:23:41,727] Trial 31 finished with value: 1.3602709225766423 and parameters: {'n_estimators': 650, 'learning_rate': 0.0498091562170599, 'max_depth': 11, 'num_leaves': 127, 'subsample': 0.7338343472332616, 'colsample_bytree': 0.5928403534077527, 'reg_alpha': 0.5534827213863555, 'reg_lambda': 0.012551309173775269}. Best is trial 29 with value: 1.3554432770214284.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000847 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[719]	valid_0's l2: 1.35838


[I 2025-04-06 17:23:43,343] Trial 32 finished with value: 1.3583768548848192 and parameters: {'n_estimators': 750, 'learning_rate': 0.04693714845162436, 'max_depth': 10, 'num_leaves': 126, 'subsample': 0.7321004220267495, 'colsample_bytree': 0.5895407161036854, 'reg_alpha': 0.5822981872492683, 'reg_lambda': 0.010006468271290255}. Best is trial 29 with value: 1.3554432770214284.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000456 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[547]	valid_0's l2: 1.36348


[I 2025-04-06 17:23:44,051] Trial 33 finished with value: 1.3634754321894669 and parameters: {'n_estimators': 750, 'learning_rate': 0.045706393922264874, 'max_depth': 10, 'num_leaves': 124, 'subsample': 0.7409237720496283, 'colsample_bytree': 0.5830224138403239, 'reg_alpha': 0.6073266820343175, 'reg_lambda': 0.0018690570847487518}. Best is trial 29 with value: 1.3554432770214284.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000714 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[599]	valid_0's l2: 1.36492


[I 2025-04-06 17:23:45,480] Trial 34 finished with value: 1.364919359834554 and parameters: {'n_estimators': 600, 'learning_rate': 0.05314193166760738, 'max_depth': 10, 'num_leaves': 144, 'subsample': 0.7376607940962043, 'colsample_bytree': 0.6008289218762634, 'reg_alpha': 0.5221690843079098, 'reg_lambda': 0.10285573218536005}. Best is trial 29 with value: 1.3554432770214284.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000561 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[650]	valid_0's l2: 1.36685


[I 2025-04-06 17:23:46,855] Trial 35 finished with value: 1.366851201361391 and parameters: {'n_estimators': 650, 'learning_rate': 0.04805044189700865, 'max_depth': 8, 'num_leaves': 128, 'subsample': 0.6657641431760738, 'colsample_bytree': 0.5502369739691353, 'reg_alpha': 0.6191679798422374, 'reg_lambda': 0.0355882028640912}. Best is trial 29 with value: 1.3554432770214284.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000831 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[558]	valid_0's l2: 1.36743


[I 2025-04-06 17:23:48,159] Trial 36 finished with value: 1.3674276900213485 and parameters: {'n_estimators': 700, 'learning_rate': 0.09147049106124941, 'max_depth': 9, 'num_leaves': 139, 'subsample': 0.7791617056221178, 'colsample_bytree': 0.6029096766227069, 'reg_alpha': 0.4738827879287461, 'reg_lambda': 0.1072342262595068}. Best is trial 29 with value: 1.3554432770214284.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000988 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[800]	valid_0's l2: 1.39313


[I 2025-04-06 17:23:49,104] Trial 37 finished with value: 1.3931304205786408 and parameters: {'n_estimators': 800, 'learning_rate': 0.03115998985965609, 'max_depth': 6, 'num_leaves': 138, 'subsample': 0.8128735426984974, 'colsample_bytree': 0.618846344608078, 'reg_alpha': 0.6515917123426184, 'reg_lambda': 0.22831688530926147}. Best is trial 29 with value: 1.3554432770214284.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000162 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[427]	valid_0's l2: 1.36031


[I 2025-04-06 17:23:50,392] Trial 38 finished with value: 1.3603055145439775 and parameters: {'n_estimators': 650, 'learning_rate': 0.06637379857404134, 'max_depth': 10, 'num_leaves': 146, 'subsample': 0.6750774615844719, 'colsample_bytree': 0.5447265026043792, 'reg_alpha': 0.8097732139696661, 'reg_lambda': 0.023987286199581764}. Best is trial 29 with value: 1.3554432770214284.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000816 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[139]	valid_0's l2: 1.37332


[I 2025-04-06 17:23:50,864] Trial 39 finished with value: 1.373319754261206 and parameters: {'n_estimators': 650, 'learning_rate': 0.1518172944701454, 'max_depth': 10, 'num_leaves': 150, 'subsample': 0.6132337275364863, 'colsample_bytree': 0.5385330649309913, 'reg_alpha': 0.7875080445134589, 'reg_lambda': 0.0289150839689766}. Best is trial 29 with value: 1.3554432770214284.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000656 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[791]	valid_0's l2: 1.36646


[I 2025-04-06 17:23:52,032] Trial 40 finished with value: 1.3664616123022635 and parameters: {'n_estimators': 800, 'learning_rate': 0.06884599663109361, 'max_depth': 7, 'num_leaves': 112, 'subsample': 0.6821934324904295, 'colsample_bytree': 0.5774813738264561, 'reg_alpha': 0.8867508559756749, 'reg_lambda': 0.009079038923541016}. Best is trial 29 with value: 1.3554432770214284.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003007 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[549]	valid_0's l2: 1.36042


[I 2025-04-06 17:23:53,223] Trial 41 finished with value: 1.3604207878744516 and parameters: {'n_estimators': 550, 'learning_rate': 0.051988165409294476, 'max_depth': 10, 'num_leaves': 121, 'subsample': 0.7383684392059572, 'colsample_bytree': 0.5333422955741064, 'reg_alpha': 0.8125639952792012, 'reg_lambda': 0.10099033129386296}. Best is trial 29 with value: 1.3554432770214284.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000196 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[303]	valid_0's l2: 1.35944


[I 2025-04-06 17:23:54,161] Trial 42 finished with value: 1.3594423311256154 and parameters: {'n_estimators': 650, 'learning_rate': 0.09481210751601779, 'max_depth': 10, 'num_leaves': 144, 'subsample': 0.7408893866641267, 'colsample_bytree': 0.5296486931818373, 'reg_alpha': 0.8213538031606057, 'reg_lambda': 0.2075983798045733}. Best is trial 29 with value: 1.3554432770214284.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000878 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[290]	valid_0's l2: 1.3724


[I 2025-04-06 17:23:55,044] Trial 43 finished with value: 1.3723986680950102 and parameters: {'n_estimators': 750, 'learning_rate': 0.09559220129501747, 'max_depth': 9, 'num_leaves': 145, 'subsample': 0.7209223407220974, 'colsample_bytree': 0.6317682347440463, 'reg_alpha': 0.9237326523831104, 'reg_lambda': 0.19315261967127761}. Best is trial 29 with value: 1.3554432770214284.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000740 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:55,860] Trial 44 finished with value: 1.363945521463939 and parameters: {'n_estimators': 650, 'learning_rate': 0.09373474173503903, 'max_depth': 10, 'num_leaves': 137, 'subsample': 0.7555794187274752, 'colsample_bytree': 0.5631674467150108, 'reg_alpha': 0.8437984788084398, 'reg_lambda': 0.05618021378914079}. Best is trial 29 with value: 1.3554432770214284.


Early stopping, best iteration is:
[308]	valid_0's l2: 1.36395
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000150 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[134]	valid_0's l2: 1.38596


[I 2025-04-06 17:23:56,302] Trial 45 finished with value: 1.3859605175878416 and parameters: {'n_estimators': 700, 'learning_rate': 0.2155403262775324, 'max_depth': 8, 'num_leaves': 149, 'subsample': 0.6836000964884401, 'colsample_bytree': 0.5059521056605821, 'reg_alpha': 0.6830726826922223, 'reg_lambda': 0.2366379230872547}. Best is trial 29 with value: 1.3554432770214284.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000887 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[420]	valid_0's l2: 1.36535


[I 2025-04-06 17:23:57,251] Trial 46 finished with value: 1.3653502666063033 and parameters: {'n_estimators': 600, 'learning_rate': 0.06964610569768537, 'max_depth': 12, 'num_leaves': 117, 'subsample': 0.5862534487693072, 'colsample_bytree': 0.6140914111871458, 'reg_alpha': 0.9931895697242169, 'reg_lambda': 0.001539882458163935}. Best is trial 29 with value: 1.3554432770214284.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000765 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:57,844] Trial 47 finished with value: 1.379565251304822 and parameters: {'n_estimators': 800, 'learning_rate': 0.1479069937344848, 'max_depth': 9, 'num_leaves': 143, 'subsample': 0.7145894902408552, 'colsample_bytree': 0.5928656275865813, 'reg_alpha': 0.5970215009739831, 'reg_lambda': 0.32318747369914486}. Best is trial 29 with value: 1.3554432770214284.


Early stopping, best iteration is:
[197]	valid_0's l2: 1.37957
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001154 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:58,857] Trial 48 finished with value: 1.3577088742922403 and parameters: {'n_estimators': 650, 'learning_rate': 0.07839686884597474, 'max_depth': 11, 'num_leaves': 85, 'subsample': 0.7617699618560149, 'colsample_bytree': 0.9770785820551854, 'reg_alpha': 0.7386478804034725, 'reg_lambda': 0.1368181897006921}. Best is trial 29 with value: 1.3554432770214284.


Did not meet early stopping. Best iteration is:
[636]	valid_0's l2: 1.35771
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000803 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds


[I 2025-04-06 17:23:59,611] Trial 49 finished with value: 1.3646966666311782 and parameters: {'n_estimators': 750, 'learning_rate': 0.08316390907745748, 'max_depth': 12, 'num_leaves': 86, 'subsample': 0.7609691893863579, 'colsample_bytree': 0.9524776001002138, 'reg_alpha': 0.7406490399426696, 'reg_lambda': 0.13396182060809905}. Best is trial 29 with value: 1.3554432770214284.


Early stopping, best iteration is:
[393]	valid_0's l2: 1.3647
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000113 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 52735, number of used features: 6
[LightGBM] [Info] Start training from score 2.088721
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[715]	valid_0's l2: 1.35544
Optimized LGBM model and scaler saved.
Optimized Mean Squared Error: 1.3554432770214284
Optimized Root Mean Squared Error: 1.1642350608968226
Optimized R-squared Score: 0.38196206210643346
Actual: 4.2940, Predicted: 3.0740
Actual: 2.3458, Predicted: 2.3188
Actual: 1.7798, Predicted: 1.6533
Actual: 0.6163, Predicted: 1.3474
Actual: 3.9000, Predicted: 3.7456
Actual: 3.2552, Predicted: 2.3776
Act



In [4]:
# random forest
import pandas as pd
import numpy as np
import joblib
import optuna
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
file_path = "dataset_true_score.xlsx"
df = pd.read_excel(file_path)

features = ["cost", "pop_density", "traffic_rte", "visibility", "avg_price_level", "Comp_Score"]
target = "true_score"

# Clean data
df[target] = pd.to_numeric(df[target], errors='coerce')
df = df.dropna()

X = df[features]
y = df[target]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Optuna optimization
def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 1000, step=100),
        "max_depth": trial.suggest_int("max_depth", 5, 30),
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 10),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 4),
        "max_features": trial.suggest_categorical("max_features", ["sqrt", "log2", None]),
    }

    model = RandomForestRegressor(**params, random_state=42)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    return -r2_score(y_test, y_pred)  # Negative because Optuna minimizes

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# Train best model
best_params = study.best_params
rf = RandomForestRegressor(**best_params, random_state=42)
rf.fit(X_train_scaled, y_train)

# Save model
joblib.dump(rf, "rf_model_optuna.pkl")
joblib.dump(scaler, "scaler.pkl")
print("Optimized RF model and scaler saved.")

# Predictions
y_pred = rf.predict(X_test_scaled)

# Evaluation
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("Optimized Mean Squared Error:", mse)
print("Optimized Root Mean Squared Error:", rmse)
print("Optimized R-squared Score:", r2)

# Show predictions
for actual, predicted in zip(y_test[:10], y_pred[:10]):
    print(f"Actual: {actual:.4f}, Predicted: {predicted:.4f}")


[I 2025-04-04 23:24:44,225] A new study created in memory with name: no-name-2d0d30a1-bdf2-4248-9d47-3c4e6093c20c
[I 2025-04-04 23:24:53,646] Trial 0 finished with value: -0.07939828098378154 and parameters: {'n_estimators': 400, 'max_depth': 15, 'min_samples_split': 4, 'min_samples_leaf': 4, 'max_features': 'log2'}. Best is trial 0 with value: -0.07939828098378154.
[I 2025-04-04 23:25:14,077] Trial 1 finished with value: -0.07800935783260199 and parameters: {'n_estimators': 1000, 'max_depth': 13, 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_features': 'sqrt'}. Best is trial 0 with value: -0.07939828098378154.
[I 2025-04-04 23:25:36,216] Trial 2 finished with value: -0.08387594415131383 and parameters: {'n_estimators': 800, 'max_depth': 22, 'min_samples_split': 7, 'min_samples_leaf': 4, 'max_features': 'log2'}. Best is trial 2 with value: -0.08387594415131383.
[I 2025-04-04 23:25:51,028] Trial 3 finished with value: -0.08796235261066776 and parameters: {'n_estimators': 500, 'max

Optimized RF model and scaler saved.
Optimized Mean Squared Error: 1.9684610008679293
Optimized Root Mean Squared Error: 1.4030185319046677
Optimized R-squared Score: 0.09086524472440638
Actual: 4.4782, Predicted: 2.2540
Actual: 3.9231, Predicted: 2.2994
Actual: 0.5914, Predicted: 3.0317
Actual: 0.9616, Predicted: 2.1452
Actual: 4.2000, Predicted: 3.2190
Actual: 4.1915, Predicted: 3.5262
Actual: 1.3910, Predicted: 3.3515
Actual: 3.4682, Predicted: 2.5866
Actual: 3.8512, Predicted: 3.0822
Actual: 3.4591, Predicted: 3.1524
