In [1]:
import os
import math
import time
import joblib
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold, cross_val_score, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import HistGradientBoostingRegressor

In [2]:
try:
    import lightgbm as lgb
    LGB_AVAILABLE = True
except Exception:
    LGB_AVAILABLE = False

In [3]:
train_data = pd.read_csv('/kaggle/input/playground-series-s5e9/train.csv')
test_data = pd.read_csv('/kaggle/input/playground-series-s5e9/test.csv')
sample = pd.read_csv('/kaggle/input/playground-series-s5e9/sample_submission.csv')

In [4]:
train_data.head()

Unnamed: 0,id,RhythmScore,AudioLoudness,VocalContent,AcousticQuality,InstrumentalScore,LivePerformanceLikelihood,MoodScore,TrackDurationMs,Energy,BeatsPerMinute
0,0,0.60361,-7.636942,0.0235,5e-06,1e-06,0.051385,0.409866,290715.645,0.826267,147.5302
1,1,0.639451,-16.267598,0.07152,0.444929,0.349414,0.170522,0.65101,164519.5174,0.1454,136.15963
2,2,0.514538,-15.953575,0.110715,0.173699,0.453814,0.029576,0.423865,174495.5667,0.624667,55.31989
3,3,0.734463,-1.357,0.052965,0.001651,0.159717,0.086366,0.278745,225567.4651,0.487467,147.91212
4,4,0.532968,-13.056437,0.0235,0.068687,1e-06,0.331345,0.477769,213960.6789,0.947333,89.58511


In [5]:
test_data.head()

Unnamed: 0,id,RhythmScore,AudioLoudness,VocalContent,AcousticQuality,InstrumentalScore,LivePerformanceLikelihood,MoodScore,TrackDurationMs,Energy
0,524164,0.410013,-16.794967,0.0235,0.23291,0.012689,0.271585,0.664321,302901.5498,0.424867
1,524165,0.463071,-1.357,0.141818,0.057725,0.257942,0.097624,0.829552,221995.6643,0.846
2,524166,0.686569,-3.368928,0.167851,0.287823,0.210915,0.325909,0.304978,357724.0127,0.134067
3,524167,0.885793,-5.598049,0.118488,5e-06,0.376906,0.134435,0.48774,271790.3989,0.316467
4,524168,0.637391,-7.06816,0.126099,0.539073,0.06895,0.0243,0.591248,277728.5383,0.481067


In [6]:
sample.head()

Unnamed: 0,id,BeatsPerMinute
0,524164,119.035
1,524165,119.035
2,524166,119.035
3,524167,119.035
4,524168,119.035


In [7]:
TRAIN_CSV = "train.csv"
TEST_CSV  = "test.csv"
TARGET = "BeatsPerMinute"
IDCOL = "id"

In [8]:
DEFAULT_PARAMS = {
    "learning_rate": 0.05,
    "n_estimators": 1000, 
    "num_leaves": 63,
    "min_child_samples": 50,
    "reg_lambda": 1.0,
    "reg_alpha": 0.0,
    "early_stopping_rounds": 50,
    "verbose": -1,
    "random_state": 42
}
SKLEARN_PARAMS = {
    "learning_rate": 0.05,
    "max_iter": 400,
    "max_leaf_nodes": 63,
    "min_samples_leaf": 50,
    "early_stopping": True,
    "validation_fraction": 0.1,
    "random_state": 42
}

In [9]:
TUNE_SUBSAMPLE = 100_000
def add_engineered_features(df):
    """Add a handful of robust features derived from the dataset's numeric columns."""
    df = df.copy()
    # duration in seconds
    if "TrackDurationMs" in df.columns:
        df["Duration_s"] = df["TrackDurationMs"] / 1000.0
        df["log_Duration_s"] = np.log1p(df["Duration_s"])
    # interactions that make musical sense
    if "RhythmScore" in df.columns and "Energy" in df.columns:
        df["Rhythm_x_Energy"] = df["RhythmScore"] * df["Energy"]
    if "InstrumentalScore" in df.columns and "AcousticQuality" in df.columns:
        df["Instrumental_over_Acoustic"] = (df["InstrumentalScore"] + 1e-6) / (df["AcousticQuality"] + 1e-6)
    if "VocalContent" in df.columns and "InstrumentalScore" in df.columns:
        df["Vocal_to_Instrumental"] = (df["VocalContent"] + 1e-6) / (df["InstrumentalScore"] + 1e-6)
    if "MoodScore" in df.columns and "RhythmScore" in df.columns:
        df["Mood_x_Rhythm"] = df["MoodScore"] * df["RhythmScore"]
    return df

def rmse(y_true, y_pred):
    return math.sqrt(mean_squared_error(y_true, y_pred))

In [10]:
X = train_data.drop(columns=[IDCOL, TARGET])
y = train_data[TARGET].values
X_test = test_data.drop(columns=[IDCOL])

X = add_engineered_features(X)
X_test = add_engineered_features(X_test)


for c in X.columns:
    if c not in X_test.columns:
        X_test[c] = 0
X_test = X_test[X.columns] 

In [11]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [12]:
# Complete LightGBM Model for Google Colab
# Just run this cell after loading your data (X_train, y_train, X_val, y_val)

import lightgbm as lgb
import optuna
import numpy as np
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings('ignore')

def objective(trial):
    params = {
        "objective": "regression",
        "metric": "rmse",
        "boosting_type": "gbdt",
        "device": "gpu",  # Changed to CPU for Colab compatibility
        "verbosity": -1,
        "seed": 42,
        
        # Optimized parameter ranges
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 31, 300),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
        "min_split_gain": trial.suggest_float("min_split_gain", 0.0, 1.0),
        "reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 10.0, log=True),
        "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 10.0, log=True),
    }
    
    dtrain = lgb.Dataset(X_train, label=y_train)
    dval = lgb.Dataset(X_val, label=y_val, reference=dtrain)
    
    model = lgb.train(
        params,
        dtrain,
        valid_sets=[dval],
        num_boost_round=1000,
        callbacks=[
            lgb.early_stopping(stopping_rounds=50),
            lgb.log_evaluation(period=0)  # Silent
        ]
    )
    
    preds = model.predict(X_val, num_iteration=model.best_iteration)
    rmse = mean_squared_error(y_val, preds, squared=False)
    return rmse

# Run optimization
print("Starting hyperparameter optimization...")
study = optuna.create_study(
    direction="minimize",
    sampler=optuna.samplers.TPESampler(seed=42)
)

study.optimize(objective, n_trials=50)

print(f"Best RMSE: {study.best_value:.6f}")
print("Best parameters found:")
for key, value in study.best_params.items():
    print(f"  {key}: {value}")

# Get final parameters
best_params = study.best_params.copy()
best_params.update({
    "objective": "regression",
    "metric": "rmse", 
    "boosting_type": "gbdt",
    "device": "cpu",
    "verbosity": -1,
    "seed": 42
})

# Train final model
print("\nTraining final model with best parameters...")
dtrain = lgb.Dataset(X_train, label=y_train)
dval = lgb.Dataset(X_val, label=y_val, reference=dtrain)

final_model = lgb.train(
    best_params,
    dtrain,
    valid_sets=[dtrain, dval],
    valid_names=['train', 'val'],
    num_boost_round=1000,
    callbacks=[
        lgb.early_stopping(stopping_rounds=50),
        lgb.log_evaluation(period=50)
    ]
)



# Feature importance (optional - uncomment if needed)
# import matplotlib.pyplot as plt
# lgb.plot_importance(final_model, max_num_features=20, figsize=(10, 8))
# plt.tight_layout()
# plt.show()

print("\nModel training complete! Use 'final_model' for predictions.")
print("Example: predictions = final_model.predict(X_test)")

[I 2025-09-02 19:51:15,427] A new study created in memory with name: no-name-2a942936-d568-4132-9ce0-1dc3cf049a41


Starting hyperparameter optimization...




Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:51:24,231] Trial 0 finished with value: 26.444369200535657 and parameters: {'learning_rate': 0.03574712922600244, 'num_leaves': 287, 'max_depth': 10, 'feature_fraction': 0.759195090518222, 'bagging_fraction': 0.4936111842654619, 'bagging_freq': 2, 'min_child_samples': 10, 'lambda_l1': 0.6245760287469893, 'lambda_l2': 0.002570603566117598, 'min_split_gain': 0.7080725777960455, 'reg_alpha': 1.5320059381854043e-08, 'reg_lambda': 5.360294728728285}. Best is trial 0 with value: 26.444369200535657.


Early stopping, best iteration is:
[16]	valid_0's rmse: 26.4444
Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:51:25,692] Trial 1 finished with value: 26.441030317688075 and parameters: {'learning_rate': 0.16967533607196555, 'num_leaves': 88, 'max_depth': 4, 'feature_fraction': 0.5100427059120604, 'bagging_fraction': 0.5825453457757226, 'bagging_freq': 4, 'min_child_samples': 46, 'lambda_l1': 4.17890272377219e-06, 'lambda_l2': 0.0032112643094417484, 'min_split_gain': 0.13949386065204183, 'reg_alpha': 4.258943089524393e-06, 'reg_lambda': 1.9826980964985924e-05}. Best is trial 1 with value: 26.441030317688075.


Early stopping, best iteration is:
[8]	valid_0's rmse: 26.441
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[79]	valid_0's rmse: 26.4391


[I 2025-09-02 19:51:27,826] Trial 2 finished with value: 26.439095992651232 and parameters: {'learning_rate': 0.04717052037625178, 'num_leaves': 242, 'max_depth': 4, 'feature_fraction': 0.708540663048167, 'bagging_fraction': 0.7554487413172255, 'bagging_freq': 1, 'min_child_samples': 63, 'lambda_l1': 3.425445902633376e-07, 'lambda_l2': 3.850031979199519e-08, 'min_split_gain': 0.9488855372533332, 'reg_alpha': 4.905556676028774, 'reg_lambda': 0.18861495878553936}. Best is trial 2 with value: 26.439095992651232.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[74]	valid_0's rmse: 26.4387


[I 2025-09-02 19:51:31,223] Trial 3 finished with value: 26.438707131989894 and parameters: {'learning_rate': 0.028180680291847244, 'num_leaves': 57, 'max_depth': 9, 'feature_fraction': 0.6640914962437607, 'bagging_fraction': 0.47322294090686734, 'bagging_freq': 4, 'min_child_samples': 8, 'lambda_l1': 1.527156759251193, 'lambda_l2': 2.133142332373004e-06, 'min_split_gain': 0.662522284353982, 'reg_alpha': 6.388511557344611e-06, 'reg_lambda': 0.0004793052550782129}. Best is trial 3 with value: 26.438707131989894.


Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:51:33,583] Trial 4 finished with value: 26.44024113558932 and parameters: {'learning_rate': 0.06420330336297862, 'num_leaves': 80, 'max_depth': 12, 'feature_fraction': 0.8650796940166687, 'bagging_fraction': 0.9636993649385135, 'bagging_freq': 7, 'min_child_samples': 62, 'lambda_l1': 1.9809253750493907, 'lambda_l2': 6.257956190096665e-08, 'min_split_gain': 0.1959828624191452, 'reg_alpha': 2.5529693461039728e-08, 'reg_lambda': 8.471746987003668e-06}. Best is trial 3 with value: 26.438707131989894.


Early stopping, best iteration is:
[22]	valid_0's rmse: 26.4402
Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:51:36,731] Trial 5 finished with value: 26.441325128393938 and parameters: {'learning_rate': 0.03750796359625606, 'num_leaves': 104, 'max_depth': 11, 'feature_fraction': 0.6140519960161536, 'bagging_fraction': 0.5685607058124285, 'bagging_freq': 4, 'min_child_samples': 18, 'lambda_l1': 0.16587190283399655, 'lambda_l2': 4.6876566400928895e-08, 'min_split_gain': 0.9868869366005173, 'reg_alpha': 0.08916674715636537, 'reg_lambda': 6.143857495033091e-07}. Best is trial 3 with value: 26.438707131989894.


Early stopping, best iteration is:
[40]	valid_0's rmse: 26.4413
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[83]	valid_0's rmse: 26.4393


[I 2025-09-02 19:51:42,969] Trial 6 finished with value: 26.439288937515077 and parameters: {'learning_rate': 0.010189592979395137, 'num_leaves': 251, 'max_depth': 10, 'feature_fraction': 0.8374043008245924, 'bagging_fraction': 0.8627622080115674, 'bagging_freq': 1, 'min_child_samples': 39, 'lambda_l1': 1.1036250149900698e-07, 'lambda_l2': 0.5860448217200517, 'min_split_gain': 0.6232981268275579, 'reg_alpha': 9.507847858536042e-06, 'reg_lambda': 3.732717755563729e-08}. Best is trial 3 with value: 26.438707131989894.


Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:51:46,350] Trial 7 finished with value: 26.43834016106985 and parameters: {'learning_rate': 0.028797752657070342, 'num_leaves': 118, 'max_depth': 10, 'feature_fraction': 0.7825344828131279, 'bagging_fraction': 0.932327645545796, 'bagging_freq': 4, 'min_child_samples': 16, 'lambda_l1': 0.02625445968759339, 'lambda_l2': 0.0703178263660987, 'min_split_gain': 0.5612771975694962, 'reg_alpha': 0.08683696167603723, 'reg_lambda': 0.0002780739892288472}. Best is trial 7 with value: 26.43834016106985.


Early stopping, best iteration is:
[32]	valid_0's rmse: 26.4383
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[93]	valid_0's rmse: 26.4386


[I 2025-09-02 19:51:48,590] Trial 8 finished with value: 26.43863693301852 and parameters: {'learning_rate': 0.05917520523090666, 'num_leaves': 146, 'max_depth': 3, 'feature_fraction': 0.4647348561959827, 'bagging_fraction': 0.4188575114120406, 'bagging_freq': 5, 'min_child_samples': 35, 'lambda_l1': 0.0003776906385115029, 'lambda_l2': 1.4726456718740824, 'min_split_gain': 0.24929222914887494, 'reg_alpha': 4.9368087974032924e-05, 'reg_lambda': 0.06308995924905789}. Best is trial 7 with value: 26.43834016106985.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[120]	valid_0's rmse: 26.4396


[I 2025-09-02 19:51:52,943] Trial 9 finished with value: 26.439580734228354 and parameters: {'learning_rate': 0.021775224101934068, 'num_leaves': 51, 'max_depth': 5, 'feature_fraction': 0.49673277235240265, 'bagging_fraction': 0.9578185914055438, 'bagging_freq': 6, 'min_child_samples': 65, 'lambda_l1': 0.6968596391373533, 'lambda_l2': 0.1710207048797339, 'min_split_gain': 0.18657005888603584, 'reg_alpha': 1.0790237065789294, 'reg_lambda': 0.000714628244934021}. Best is trial 7 with value: 26.43834016106985.


Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:51:55,270] Trial 10 finished with value: 26.441616684835097 and parameters: {'learning_rate': 0.14062854342115, 'num_leaves': 181, 'max_depth': 7, 'feature_fraction': 0.9593742051470724, 'bagging_fraction': 0.7840531014805339, 'bagging_freq': 3, 'min_child_samples': 97, 'lambda_l1': 0.0030547790322864848, 'lambda_l2': 2.01418720453791e-05, 'min_split_gain': 0.37960424342022336, 'reg_alpha': 0.012714058277038117, 'reg_lambda': 0.01881061891210943}. Best is trial 7 with value: 26.43834016106985.


Early stopping, best iteration is:
[5]	valid_0's rmse: 26.4416
Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:51:57,568] Trial 11 finished with value: 26.442822459411346 and parameters: {'learning_rate': 0.07974740770146303, 'num_leaves': 149, 'max_depth': 7, 'feature_fraction': 0.4121784486479587, 'bagging_fraction': 0.656267383379999, 'bagging_freq': 5, 'min_child_samples': 30, 'lambda_l1': 0.0006895531113505957, 'lambda_l2': 4.545308413940443, 'min_split_gain': 0.37691477171409976, 'reg_alpha': 0.0005833455990110428, 'reg_lambda': 0.019443434939367503}. Best is trial 7 with value: 26.43834016106985.


Early stopping, best iteration is:
[9]	valid_0's rmse: 26.4428
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[101]	valid_0's rmse: 26.44


[I 2025-09-02 19:52:04,042] Trial 12 finished with value: 26.43997544341194 and parameters: {'learning_rate': 0.01598716810745302, 'num_leaves': 148, 'max_depth': 8, 'feature_fraction': 0.6056795729597898, 'bagging_fraction': 0.81588578621961, 'bagging_freq': 6, 'min_child_samples': 28, 'lambda_l1': 3.078760428803112e-05, 'lambda_l2': 0.04613222562051556, 'min_split_gain': 0.4368038404265787, 'reg_alpha': 0.0018190904900702786, 'reg_lambda': 9.44455859063907}. Best is trial 7 with value: 26.43834016106985.


Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:52:05,522] Trial 13 finished with value: 26.438300431756463 and parameters: {'learning_rate': 0.11049895148955624, 'num_leaves': 190, 'max_depth': 3, 'feature_fraction': 0.8019938737296212, 'bagging_fraction': 0.4316983285204165, 'bagging_freq': 5, 'min_child_samples': 26, 'lambda_l1': 0.01531838616039128, 'lambda_l2': 2.2361071609276455, 'min_split_gain': 0.03402234078025376, 'reg_alpha': 8.38867239943569e-05, 'reg_lambda': 0.016763296049319098}. Best is trial 13 with value: 26.438300431756463.


Early stopping, best iteration is:
[27]	valid_0's rmse: 26.4383
Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:52:07,452] Trial 14 finished with value: 26.44154765634626 and parameters: {'learning_rate': 0.2426004019564968, 'num_leaves': 200, 'max_depth': 6, 'feature_fraction': 0.8152656479985093, 'bagging_fraction': 0.6772704500868822, 'bagging_freq': 3, 'min_child_samples': 21, 'lambda_l1': 0.00945567340077158, 'lambda_l2': 0.015473915031751793, 'min_split_gain': 0.004682189677379663, 'reg_alpha': 0.10566317039122851, 'reg_lambda': 0.0015579107174789602}. Best is trial 13 with value: 26.438300431756463.


Early stopping, best iteration is:
[2]	valid_0's rmse: 26.4415
Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:52:10,553] Trial 15 finished with value: 26.44272317620122 and parameters: {'learning_rate': 0.09347237905208677, 'num_leaves': 218, 'max_depth': 9, 'feature_fraction': 0.9600611999938756, 'bagging_fraction': 0.9017544811285909, 'bagging_freq': 5, 'min_child_samples': 86, 'lambda_l1': 0.03896376942414267, 'lambda_l2': 9.768743953048428, 'min_split_gain': 0.8139937055778317, 'reg_alpha': 0.0071823081017600805, 'reg_lambda': 3.801185697926984e-05}. Best is trial 13 with value: 26.438300431756463.


Early stopping, best iteration is:
[5]	valid_0's rmse: 26.4427
Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:52:12,344] Trial 16 finished with value: 26.438814472642967 and parameters: {'learning_rate': 0.11971677241936413, 'num_leaves': 120, 'max_depth': 3, 'feature_fraction': 0.7648731806091493, 'bagging_fraction': 0.6150430116857828, 'bagging_freq': 3, 'min_child_samples': 5, 'lambda_l1': 2.5744995167527635e-05, 'lambda_l2': 0.00030872817327102625, 'min_split_gain': 0.559761699970516, 'reg_alpha': 0.00017435902010390957, 'reg_lambda': 0.8232034142326641}. Best is trial 13 with value: 26.438300431756463.


Early stopping, best iteration is:
[32]	valid_0's rmse: 26.4388
Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:52:14,740] Trial 17 finished with value: 26.4466593476425 and parameters: {'learning_rate': 0.29123033899917067, 'num_leaves': 187, 'max_depth': 12, 'feature_fraction': 0.8961803940515138, 'bagging_fraction': 0.7526573291114901, 'bagging_freq': 7, 'min_child_samples': 49, 'lambda_l1': 7.993901253143485, 'lambda_l2': 0.14657677918011514, 'min_split_gain': 0.0038600759352363477, 'reg_alpha': 2.0980070141798737e-07, 'reg_lambda': 0.0035693055549452925}. Best is trial 13 with value: 26.438300431756463.


Early stopping, best iteration is:
[1]	valid_0's rmse: 26.4467
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[113]	valid_0's rmse: 26.4395


[I 2025-09-02 19:52:19,244] Trial 18 finished with value: 26.439519482327942 and parameters: {'learning_rate': 0.01823330592059452, 'num_leaves': 125, 'max_depth': 6, 'feature_fraction': 0.7563885955532469, 'bagging_fraction': 0.5172168690588572, 'bagging_freq': 6, 'min_child_samples': 20, 'lambda_l1': 0.020071993110896212, 'lambda_l2': 0.000194566564516302, 'min_split_gain': 0.5047142760952016, 'reg_alpha': 0.20876177141601565, 'reg_lambda': 6.943186017072804e-05}. Best is trial 13 with value: 26.438300431756463.


Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:52:21,836] Trial 19 finished with value: 26.44163717863324 and parameters: {'learning_rate': 0.18792686459303665, 'num_leaves': 292, 'max_depth': 8, 'feature_fraction': 0.9075446155145139, 'bagging_fraction': 0.9878461902155602, 'bagging_freq': 5, 'min_child_samples': 41, 'lambda_l1': 0.0017230218572485776, 'lambda_l2': 0.008351239053195939, 'min_split_gain': 0.7962045724676272, 'reg_alpha': 7.240213945149135e-07, 'reg_lambda': 2.4379620486650113e-06}. Best is trial 13 with value: 26.438300431756463.


Early stopping, best iteration is:
[3]	valid_0's rmse: 26.4416
Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:52:24,785] Trial 20 finished with value: 26.44463709371389 and parameters: {'learning_rate': 0.09795783960806169, 'num_leaves': 227, 'max_depth': 10, 'feature_fraction': 0.6837462325229557, 'bagging_fraction': 0.40608257690742966, 'bagging_freq': 2, 'min_child_samples': 76, 'lambda_l1': 0.10209986397265433, 'lambda_l2': 0.6434523233771845, 'min_split_gain': 0.30842950447467743, 'reg_alpha': 0.010407569640040535, 'reg_lambda': 0.006031267765524654}. Best is trial 13 with value: 26.438300431756463.


Early stopping, best iteration is:
[2]	valid_0's rmse: 26.4446
Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:52:27,005] Trial 21 finished with value: 26.438634626406824 and parameters: {'learning_rate': 0.05666682882782473, 'num_leaves': 149, 'max_depth': 3, 'feature_fraction': 0.4090280480193863, 'bagging_fraction': 0.402103893221156, 'bagging_freq': 5, 'min_child_samples': 29, 'lambda_l1': 9.166337029970029e-05, 'lambda_l2': 2.4060993101802457, 'min_split_gain': 0.10262410086082371, 'reg_alpha': 5.9708266906149754e-05, 'reg_lambda': 0.13399762395483283}. Best is trial 13 with value: 26.438300431756463.


Early stopping, best iteration is:
[95]	valid_0's rmse: 26.4386
Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:52:29,628] Trial 22 finished with value: 26.438644706705492 and parameters: {'learning_rate': 0.04839209079913342, 'num_leaves': 161, 'max_depth': 4, 'feature_fraction': 0.5762705096821177, 'bagging_fraction': 0.4579179346103902, 'bagging_freq': 5, 'min_child_samples': 27, 'lambda_l1': 7.583035847949047e-05, 'lambda_l2': 2.769628046060098, 'min_split_gain': 0.094902523096968, 'reg_alpha': 0.00014297368512434838, 'reg_lambda': 0.6038837223977068}. Best is trial 13 with value: 26.438300431756463.


Early stopping, best iteration is:
[76]	valid_0's rmse: 26.4386
Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:52:31,526] Trial 23 finished with value: 26.440879502307375 and parameters: {'learning_rate': 0.07210275826202414, 'num_leaves': 124, 'max_depth': 5, 'feature_fraction': 0.8000326387989742, 'bagging_fraction': 0.5331519308509715, 'bagging_freq': 6, 'min_child_samples': 17, 'lambda_l1': 3.182242425839708e-06, 'lambda_l2': 0.12389885072253934, 'min_split_gain': 0.08389881261973153, 'reg_alpha': 4.007597954057248e-05, 'reg_lambda': 0.00015844325425370826}. Best is trial 13 with value: 26.438300431756463.


Early stopping, best iteration is:
[20]	valid_0's rmse: 26.4409
Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:52:33,838] Trial 24 finished with value: 26.438880171173476 and parameters: {'learning_rate': 0.03005215975547569, 'num_leaves': 173, 'max_depth': 3, 'feature_fraction': 0.7276658734218235, 'bagging_fraction': 0.4477153403537094, 'bagging_freq': 4, 'min_child_samples': 14, 'lambda_l1': 0.00684599133666774, 'lambda_l2': 0.9358627566980817, 'min_split_gain': 0.27817275834247857, 'reg_alpha': 0.0012966611276244609, 'reg_lambda': 0.07792561963608279}. Best is trial 13 with value: 26.438300431756463.


Early stopping, best iteration is:
[87]	valid_0's rmse: 26.4389
Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:52:35,907] Trial 25 finished with value: 26.439860168259514 and parameters: {'learning_rate': 0.042673005945218896, 'num_leaves': 204, 'max_depth': 5, 'feature_fraction': 0.6485083080867656, 'bagging_fraction': 0.40194548858880397, 'bagging_freq': 4, 'min_child_samples': 32, 'lambda_l1': 0.00012599658512344564, 'lambda_l2': 0.03551971961306068, 'min_split_gain': 0.5266603249395978, 'reg_alpha': 8.147905212610102e-07, 'reg_lambda': 0.8674838365355698}. Best is trial 13 with value: 26.438300431756463.


Early stopping, best iteration is:
[27]	valid_0's rmse: 26.4399
Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:52:39,347] Trial 26 finished with value: 26.439587850402592 and parameters: {'learning_rate': 0.023400736956391815, 'num_leaves': 77, 'max_depth': 6, 'feature_fraction': 0.9990980125296525, 'bagging_fraction': 0.7066226372270353, 'bagging_freq': 5, 'min_child_samples': 23, 'lambda_l1': 0.0010264501972384484, 'lambda_l2': 0.29686008799729946, 'min_split_gain': 0.08185345401915753, 'reg_alpha': 4.422400812033632e-05, 'reg_lambda': 0.017578945045595866}. Best is trial 13 with value: 26.438300431756463.


Early stopping, best iteration is:
[57]	valid_0's rmse: 26.4396
Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:52:41,399] Trial 27 finished with value: 26.43912381087215 and parameters: {'learning_rate': 0.11665857500182011, 'num_leaves': 32, 'max_depth': 4, 'feature_fraction': 0.566128154994262, 'bagging_fraction': 0.8530795940541166, 'bagging_freq': 3, 'min_child_samples': 53, 'lambda_l1': 0.12265494738583425, 'lambda_l2': 5.839795398889755, 'min_split_gain': 0.4350049327341754, 'reg_alpha': 0.003822378164874147, 'reg_lambda': 0.004558953706669441}. Best is trial 13 with value: 26.438300431756463.


Early stopping, best iteration is:
[25]	valid_0's rmse: 26.4391
Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:52:43,981] Trial 28 finished with value: 26.441493385666675 and parameters: {'learning_rate': 0.05709231899536706, 'num_leaves': 136, 'max_depth': 11, 'feature_fraction': 0.7885972213334904, 'bagging_fraction': 0.5474680094283428, 'bagging_freq': 6, 'min_child_samples': 41, 'lambda_l1': 1.8498286528414965e-06, 'lambda_l2': 0.002917408555929077, 'min_split_gain': 0.59547980566086, 'reg_alpha': 0.028498685566976763, 'reg_lambda': 0.12213440148354518}. Best is trial 13 with value: 26.438300431756463.


Early stopping, best iteration is:
[9]	valid_0's rmse: 26.4415
Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:52:47,865] Trial 29 finished with value: 26.444230359619464 and parameters: {'learning_rate': 0.033246232743912724, 'num_leaves': 271, 'max_depth': 9, 'feature_fraction': 0.7400623474371264, 'bagging_fraction': 0.49846103611260517, 'bagging_freq': 2, 'min_child_samples': 10, 'lambda_l1': 0.03788188706474087, 'lambda_l2': 0.0007411031711149705, 'min_split_gain': 0.7345536240320852, 'reg_alpha': 8.009641335418774, 'reg_lambda': 2.2714817432778442}. Best is trial 13 with value: 26.438300431756463.


Early stopping, best iteration is:
[17]	valid_0's rmse: 26.4442
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[448]	valid_0's rmse: 26.4382


[I 2025-09-02 19:52:56,272] Trial 30 finished with value: 26.43821477128927 and parameters: {'learning_rate': 0.010146262959817187, 'num_leaves': 106, 'max_depth': 3, 'feature_fraction': 0.8390625430773545, 'bagging_fraction': 0.9064651909885536, 'bagging_freq': 4, 'min_child_samples': 56, 'lambda_l1': 1.0035616944369364e-08, 'lambda_l2': 3.3964691191798264e-05, 'min_split_gain': 0.32667372792990435, 'reg_alpha': 0.6759193998482723, 'reg_lambda': 0.0003498506567615115}. Best is trial 30 with value: 26.43821477128927.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[224]	valid_0's rmse: 26.4389


[I 2025-09-02 19:53:01,268] Trial 31 finished with value: 26.438872817766573 and parameters: {'learning_rate': 0.012622459508644513, 'num_leaves': 104, 'max_depth': 3, 'feature_fraction': 0.8548888712768548, 'bagging_fraction': 0.9438630457680419, 'bagging_freq': 4, 'min_child_samples': 57, 'lambda_l1': 1.6063499394496429e-07, 'lambda_l2': 2.6556534470497504e-05, 'min_split_gain': 0.2109707410152793, 'reg_alpha': 0.7238885990114885, 'reg_lambda': 0.000226117284814018}. Best is trial 30 with value: 26.43821477128927.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[250]	valid_0's rmse: 26.4386


[I 2025-09-02 19:53:06,501] Trial 32 finished with value: 26.438583838730928 and parameters: {'learning_rate': 0.01415399477430974, 'num_leaves': 107, 'max_depth': 3, 'feature_fraction': 0.9082194607516996, 'bagging_fraction': 0.9040315191868005, 'bagging_freq': 4, 'min_child_samples': 70, 'lambda_l1': 3.045548323827822e-08, 'lambda_l2': 1.8571770583394173e-06, 'min_split_gain': 0.11850013446132945, 'reg_alpha': 0.7236483472916534, 'reg_lambda': 0.0015352464928589358}. Best is trial 30 with value: 26.43821477128927.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[291]	valid_0's rmse: 26.4387


[I 2025-09-02 19:53:14,301] Trial 33 finished with value: 26.438693499177482 and parameters: {'learning_rate': 0.010049439575384869, 'num_leaves': 103, 'max_depth': 4, 'feature_fraction': 0.9011814992488584, 'bagging_fraction': 0.9044016943829759, 'bagging_freq': 3, 'min_child_samples': 73, 'lambda_l1': 1.080103627191833e-08, 'lambda_l2': 4.184398294629964e-07, 'min_split_gain': 0.332209450878898, 'reg_alpha': 1.682859963136885, 'reg_lambda': 0.0011948146984762113}. Best is trial 30 with value: 26.43821477128927.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[156]	valid_0's rmse: 26.4391


[I 2025-09-02 19:53:18,978] Trial 34 finished with value: 26.439091287381764 and parameters: {'learning_rate': 0.015163800733549348, 'num_leaves': 65, 'max_depth': 4, 'feature_fraction': 0.8736040513434224, 'bagging_fraction': 0.9038142593430372, 'bagging_freq': 4, 'min_child_samples': 71, 'lambda_l1': 3.380841880885241e-08, 'lambda_l2': 3.261424259773896e-06, 'min_split_gain': 0.17213841099424823, 'reg_alpha': 0.46544880486406626, 'reg_lambda': 5.561232387476628e-06}. Best is trial 30 with value: 26.43821477128927.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[176]	valid_0's rmse: 26.4389


[I 2025-09-02 19:53:24,810] Trial 35 finished with value: 26.438859625563268 and parameters: {'learning_rate': 0.013525053854601314, 'num_leaves': 89, 'max_depth': 5, 'feature_fraction': 0.9439462207078348, 'bagging_fraction': 0.8581577541944614, 'bagging_freq': 4, 'min_child_samples': 80, 'lambda_l1': 1.0759377736050062e-08, 'lambda_l2': 3.783771490620269e-05, 'min_split_gain': 0.4383064792180552, 'reg_alpha': 3.1053486909782473, 'reg_lambda': 0.00012748877796760205}. Best is trial 30 with value: 26.43821477128927.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[108]	valid_0's rmse: 26.4389


[I 2025-09-02 19:53:28,801] Trial 36 finished with value: 26.438886639153946 and parameters: {'learning_rate': 0.022749307080516878, 'num_leaves': 95, 'max_depth': 4, 'feature_fraction': 0.8280550724813982, 'bagging_fraction': 0.92612741403413, 'bagging_freq': 3, 'min_child_samples': 57, 'lambda_l1': 2.940479454052003e-07, 'lambda_l2': 8.199789365364281e-07, 'min_split_gain': 0.13332001142259192, 'reg_alpha': 0.2666951481761216, 'reg_lambda': 0.0004223993182938088}. Best is trial 30 with value: 26.43821477128927.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[106]	valid_0's rmse: 26.4396


[I 2025-09-02 19:53:35,345] Trial 37 finished with value: 26.439584892439832 and parameters: {'learning_rate': 0.0124003352347684, 'num_leaves': 120, 'max_depth': 11, 'feature_fraction': 0.7822279922145247, 'bagging_fraction': 0.9978891427117148, 'bagging_freq': 4, 'min_child_samples': 67, 'lambda_l1': 7.584756812848888e-07, 'lambda_l2': 1.1374126649294947e-08, 'min_split_gain': 0.045882474613720595, 'reg_alpha': 0.04239701119415302, 'reg_lambda': 2.7221456472592326e-05}. Best is trial 30 with value: 26.43821477128927.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[126]	valid_0's rmse: 26.4389


[I 2025-09-02 19:53:38,385] Trial 38 finished with value: 26.438864834126438 and parameters: {'learning_rate': 0.025394772849477668, 'num_leaves': 165, 'max_depth': 3, 'feature_fraction': 0.7119355528676721, 'bagging_fraction': 0.7359865503386075, 'bagging_freq': 5, 'min_child_samples': 45, 'lambda_l1': 4.8155485933985124e-08, 'lambda_l2': 7.142560836338786e-06, 'min_split_gain': 0.23681136937753977, 'reg_alpha': 6.299770113877059, 'reg_lambda': 0.0029279954246978625}. Best is trial 30 with value: 26.43821477128927.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[99]	valid_0's rmse: 26.4383


[I 2025-09-02 19:53:43,371] Trial 39 finished with value: 26.438344413306385 and parameters: {'learning_rate': 0.019138341378855544, 'num_leaves': 72, 'max_depth': 10, 'feature_fraction': 0.9278541722998027, 'bagging_fraction': 0.8042218819752288, 'bagging_freq': 4, 'min_child_samples': 86, 'lambda_l1': 0.3498683445614373, 'lambda_l2': 2.2841072508123298e-07, 'min_split_gain': 0.16607315507608184, 'reg_alpha': 0.07833999348901144, 'reg_lambda': 1.5789320258004434e-06}. Best is trial 30 with value: 26.43821477128927.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[59]	valid_0's rmse: 26.4388


[I 2025-09-02 19:53:47,345] Trial 40 finished with value: 26.438750426296064 and parameters: {'learning_rate': 0.018930477306397308, 'num_leaves': 72, 'max_depth': 11, 'feature_fraction': 0.8554500947220065, 'bagging_fraction': 0.8052001313151048, 'bagging_freq': 4, 'min_child_samples': 88, 'lambda_l1': 0.2798066889081728, 'lambda_l2': 9.17223358492776e-08, 'min_split_gain': 0.6731032305673865, 'reg_alpha': 0.028123737841678743, 'reg_lambda': 1.6351036953943516e-07}. Best is trial 30 with value: 26.43821477128927.


Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:53:51,086] Trial 41 finished with value: 26.438330522183865 and parameters: {'learning_rate': 0.01925442998080828, 'num_leaves': 59, 'max_depth': 10, 'feature_fraction': 0.9301435179851428, 'bagging_fraction': 0.8277168837142247, 'bagging_freq': 4, 'min_child_samples': 95, 'lambda_l1': 2.0473353670608923, 'lambda_l2': 1.1623244708179212e-06, 'min_split_gain': 0.13546321409385065, 'reg_alpha': 0.11001430526709537, 'reg_lambda': 1.3367903705299073e-06}. Best is trial 30 with value: 26.43821477128927.


Early stopping, best iteration is:
[60]	valid_0's rmse: 26.4383
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[152]	valid_0's rmse: 26.4376


[I 2025-09-02 19:53:56,518] Trial 42 finished with value: 26.437643839399865 and parameters: {'learning_rate': 0.01919518276769623, 'num_leaves': 42, 'max_depth': 10, 'feature_fraction': 0.9304448434284941, 'bagging_fraction': 0.8312972997447587, 'bagging_freq': 3, 'min_child_samples': 98, 'lambda_l1': 3.291921386254803, 'lambda_l2': 2.0697079321421873e-07, 'min_split_gain': 0.1553537044053227, 'reg_alpha': 0.0633927938913678, 'reg_lambda': 1.0863293389591826e-06}. Best is trial 42 with value: 26.437643839399865.


Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:54:00,301] Trial 43 finished with value: 26.439156970586918 and parameters: {'learning_rate': 0.027506816016732237, 'num_leaves': 47, 'max_depth': 9, 'feature_fraction': 0.996198604428167, 'bagging_fraction': 0.8696371483466189, 'bagging_freq': 2, 'min_child_samples': 99, 'lambda_l1': 1.5889666418528743, 'lambda_l2': 0.00011120303930879054, 'min_split_gain': 0.25495741524061416, 'reg_alpha': 0.18309025729521272, 'reg_lambda': 6.213321763788111e-08}. Best is trial 42 with value: 26.437643839399865.


Early stopping, best iteration is:
[55]	valid_0's rmse: 26.4392
Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:54:03,033] Trial 44 finished with value: 26.438177892706154 and parameters: {'learning_rate': 0.036057153211485136, 'num_leaves': 32, 'max_depth': 10, 'feature_fraction': 0.8217853446445009, 'bagging_fraction': 0.8283285874937051, 'bagging_freq': 3, 'min_child_samples': 94, 'lambda_l1': 9.82114378598845, 'lambda_l2': 8.584411181951195e-06, 'min_split_gain': 0.06203953840030616, 'reg_alpha': 2.0296031725602264, 'reg_lambda': 2.169681574224377e-07}. Best is trial 42 with value: 26.437643839399865.


Early stopping, best iteration is:
[46]	valid_0's rmse: 26.4382
Training until validation scores don't improve for 50 rounds


[I 2025-09-02 19:54:04,942] Trial 45 finished with value: 26.438648566342206 and parameters: {'learning_rate': 0.03601227316695661, 'num_leaves': 42, 'max_depth': 12, 'feature_fraction': 0.8823234613674814, 'bagging_fraction': 0.8328162446993661, 'bagging_freq': 1, 'min_child_samples': 94, 'lambda_l1': 8.895642128728245, 'lambda_l2': 6.883749062741597e-06, 'min_split_gain': 0.05339730910912267, 'reg_alpha': 1.4087685661244994e-05, 'reg_lambda': 4.1493889038704676e-07}. Best is trial 42 with value: 26.437643839399865.


Early stopping, best iteration is:
[33]	valid_0's rmse: 26.4386
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[201]	valid_0's rmse: 26.4376


[I 2025-09-02 19:54:11,252] Trial 46 finished with value: 26.437615683345495 and parameters: {'learning_rate': 0.01125640881199085, 'num_leaves': 31, 'max_depth': 9, 'feature_fraction': 0.8375482070888639, 'bagging_fraction': 0.7904281065979449, 'bagging_freq': 3, 'min_child_samples': 92, 'lambda_l1': 2.1267912585010884, 'lambda_l2': 0.0010722372697166448, 'min_split_gain': 0.045034833982646405, 'reg_alpha': 2.70071408461679, 'reg_lambda': 1.0567766942564716e-05}. Best is trial 46 with value: 26.437615683345495.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[156]	valid_0's rmse: 26.4379


[I 2025-09-02 19:54:16,748] Trial 47 finished with value: 26.437927404397556 and parameters: {'learning_rate': 0.011274190660006022, 'num_leaves': 32, 'max_depth': 8, 'feature_fraction': 0.8419216428619188, 'bagging_fraction': 0.7893755953743498, 'bagging_freq': 3, 'min_child_samples': 91, 'lambda_l1': 4.21744569306294, 'lambda_l2': 0.00138247160104643, 'min_split_gain': 0.027855039771058174, 'reg_alpha': 9.96847527644995, 'reg_lambda': 9.00164026557489e-06}. Best is trial 46 with value: 26.437615683345495.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[158]	valid_0's rmse: 26.4384


[I 2025-09-02 19:54:22,532] Trial 48 finished with value: 26.43838783424946 and parameters: {'learning_rate': 0.011215636726196954, 'num_leaves': 34, 'max_depth': 8, 'feature_fraction': 0.8260364702903364, 'bagging_fraction': 0.7625711395315482, 'bagging_freq': 3, 'min_child_samples': 89, 'lambda_l1': 3.474639330107848, 'lambda_l2': 0.0008320983671211298, 'min_split_gain': 0.19502568133867274, 'reg_alpha': 2.3445748754199203, 'reg_lambda': 7.785620212593411e-06}. Best is trial 46 with value: 26.437615683345495.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[109]	valid_0's rmse: 26.4383


[I 2025-09-02 19:54:28,205] Trial 49 finished with value: 26.438349243798026 and parameters: {'learning_rate': 0.011556614280545358, 'num_leaves': 54, 'max_depth': 9, 'feature_fraction': 0.8466619949292421, 'bagging_fraction': 0.7782159371910851, 'bagging_freq': 2, 'min_child_samples': 80, 'lambda_l1': 0.6750029922581606, 'lambda_l2': 8.837739435286319e-05, 'min_split_gain': 0.027348602305910172, 'reg_alpha': 8.866509416948462, 'reg_lambda': 1.6929422816928325e-08}. Best is trial 46 with value: 26.437615683345495.


Best RMSE: 26.437616
Best parameters found:
  learning_rate: 0.01125640881199085
  num_leaves: 31
  max_depth: 9
  feature_fraction: 0.8375482070888639
  bagging_fraction: 0.7904281065979449
  bagging_freq: 3
  min_child_samples: 92
  lambda_l1: 2.1267912585010884
  lambda_l2: 0.0010722372697166448
  min_split_gain: 0.045034833982646405
  reg_alpha: 2.70071408461679
  reg_lambda: 1.0567766942564716e-05

Training final model with best parameters...
Training until validation scores don't improve for 50 rounds
[50]	train's rmse: 26.4583	val's rmse: 26.4402
[100]	train's rmse: 26.4476	val's rmse: 26.4384
[150]	train's rmse: 26.4388	val's rmse: 26.4381
[200]	train's rmse: 26.4304	val's rmse: 26.4378
[250]	train's rmse: 26.4224	val's rmse: 26.4381
Early stopping, best iteration is:
[201]	train's rmse: 26.4303	val's rmse: 26.4378

Model training complete! Use 'final_model' for predictions.
Example: predictions = final_model.predict(X_test)


In [13]:
# Make predictions
train_preds = final_model.predict(X_train, num_iteration=final_model.best_iteration)
val_preds = final_model.predict(X_val, num_iteration=final_model.best_iteration)

# Calculate final scores
train_rmse = mean_squared_error(y_train, train_preds, squared=False)
val_rmse = mean_squared_error(y_val, val_preds, squared=False)

print(f"\nFinal Results:")
print(f"Training RMSE: {train_rmse:.6f}")
print(f"Validation RMSE: {val_rmse:.6f}")


Final Results:
Training RMSE: 26.430259
Validation RMSE: 26.437803


In [14]:
# best_params = study.best_params
# best_params.update({
#     "objective": "regression",
#     "metric": "rmse",
#     "boosting_type": "gbdt",
#     "device": "gpu"
# })

# final_model = lgb.train(
#     best_params,
#     lgb.Dataset(X_train, label=y_train),
#     valid_sets=[lgb.Dataset(X_val, label=y_val)],
#     num_boost_round=1000,
#     callbacks=[
#         early_stopping(stopping_rounds=100),
#         log_evaluation(period=100)
#     ]
# )


In [15]:
preds_test = final_model.predict(X_test, num_iteration=final_model.best_iteration)

In [16]:
submission = pd.DataFrame({
    "id": test_data["id"],         
    "BeatsPerMinute": preds_test             
})

In [17]:
# submission.to_csv("sample_submission.csv", index=False)
# print("✅ sample_submission.csv created!")

In [18]:
submission.head()

Unnamed: 0,id,BeatsPerMinute
0,524164,118.973859
1,524165,119.077959
2,524166,119.669772
3,524167,119.429488
4,524168,119.297035


In [19]:
sample.head()

Unnamed: 0,id,BeatsPerMinute
0,524164,119.035
1,524165,119.035
2,524166,119.035
3,524167,119.035
4,524168,119.035


In [20]:
# final_model.save_model('model.h5')