In [1]:
# Kaggle Playground Series - BPM Prediction with Optuna + Stacking
# Prem's Optimized Pipeline 🚀

import pandas as pd
import numpy as np
from sklearn.model_selection import KFold, cross_val_score
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Ridge
from sklearn.preprocessing import RobustScaler
import lightgbm as lgb
import xgboost as xgb
import optuna
import warnings
warnings.filterwarnings("ignore")

# ==============================
# 1. Load Data
# ==============================
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

TARGET = "BeatsPerMinute"
ID_COL = "id"

print("Train shape:", train.shape, " Test shape:", test.shape)

# ==============================
# 2. Feature Engineering
# ==============================
def feature_engineering(train, test):
    feature_cols = [c for c in train.columns if c not in [ID_COL, TARGET]]
    combined = pd.concat([train[feature_cols], test[feature_cols]], axis=0)

    # Stats
    combined["f_sum"] = combined[feature_cols].sum(axis=1)
    combined["f_mean"] = combined[feature_cols].mean(axis=1)
    combined["f_std"] = combined[feature_cols].std(axis=1)
    combined["f_min"] = combined[feature_cols].min(axis=1)
    combined["f_max"] = combined[feature_cols].max(axis=1)
    combined["f_range"] = combined["f_max"] - combined["f_min"]

    train_f = combined.iloc[:len(train)].copy()
    test_f = combined.iloc[len(train):].copy()

    train_f[ID_COL] = train[ID_COL].values
    test_f[ID_COL] = test[ID_COL].values
    train_f[TARGET] = train[TARGET].values
    return train_f, test_f

train_f, test_f = feature_engineering(train, test)

X = train_f.drop([ID_COL, TARGET], axis=1)
y = train_f[TARGET]
X_test = test_f.drop([ID_COL], axis=1)

scaler = RobustScaler()
X_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X_test)

# ==============================
# 3. Optuna Tuning - LightGBM
# ==============================
def objective_lgb(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 500, 2000),
        "learning_rate": trial.suggest_loguniform("learning_rate", 0.005, 0.05),
        "num_leaves": trial.suggest_int("num_leaves", 31, 128),
        "max_depth": trial.suggest_int("max_depth", -1, 12),
        "subsample": trial.suggest_uniform("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.6, 1.0),
        "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-3, 10),
        "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-3, 10),
        "random_state": 42,
        "device": "gpu"
    }
    model = lgb.LGBMRegressor(**params)
    kf = KFold(n_splits=3, shuffle=True, random_state=42)
    scores = cross_val_score(model, X_scaled, y, cv=kf, scoring="neg_root_mean_squared_error")
    return -scores.mean()

print("🔎 Tuning LightGBM with Optuna...")
study_lgb = optuna.create_study(direction="minimize")
study_lgb.optimize(objective_lgb, n_trials=20)  # increase trials if more time
print("Best LGB Params:", study_lgb.best_params)

# ==============================
# 4. Optuna Tuning - XGBoost
# ==============================
def objective_xgb(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 500, 2000),
        "learning_rate": trial.suggest_loguniform("learning_rate", 0.005, 0.05),
        "max_depth": trial.suggest_int("max_depth", 4, 12),
        "subsample": trial.suggest_uniform("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.6, 1.0),
        "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-3, 10),
        "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-3, 10),
        "random_state": 42,
        "tree_method": "gpu_hist",
        "predictor": "gpu_predictor",
        "gpu_id": 0
    }
    model = xgb.XGBRegressor(**params)
    kf = KFold(n_splits=3, shuffle=True, random_state=42)
    scores = cross_val_score(model, X_scaled, y, cv=kf, scoring="neg_root_mean_squared_error")
    return -scores.mean()

print("🔎 Tuning XGBoost with Optuna...")
study_xgb = optuna.create_study(direction="minimize")
study_xgb.optimize(objective_xgb, n_trials=20)  # increase trials if more time
print("Best XGB Params:", study_xgb.best_params)

# ==============================
# 5. Train Final Models
# ==============================
best_lgb = lgb.LGBMRegressor(**study_lgb.best_params)
best_xgb = xgb.XGBRegressor(**study_xgb.best_params)

ridge = Ridge(alpha=1.0)

best_lgb.fit(X_scaled, y)
best_xgb.fit(X_scaled, y)
ridge.fit(X_scaled, y)

pred_lgb = best_lgb.predict(X_test_scaled)
pred_xgb = best_xgb.predict(X_test_scaled)
pred_ridge = ridge.predict(X_test_scaled)

# ==============================
# 6. Ensemble / Stacking
# ==============================
# Simple average
ensemble_pred = (pred_lgb + pred_xgb + pred_ridge) / 3

# Weighted (more weight to LGB + XGB)
weighted_pred = (0.4 * pred_lgb + 0.4 * pred_xgb + 0.2 * pred_ridge)

# ==============================
# 7. Submissions
# ==============================
def make_submission(preds, filename):
    sub = pd.DataFrame({ID_COL: test[ID_COL], TARGET: preds})
    sub.to_csv(filename, index=False)
    print("Saved:", filename, "| Shape:", sub.shape)

make_submission(pred_lgb, "submission_lgb.csv")
make_submission(pred_xgb, "submission_xgb.csv")
make_submission(ensemble_pred, "submission_ensemble.csv")
make_submission(weighted_pred, "submission_weighted.csv")

print("✅ All submissions saved!")


Train shape: (524164, 11)  Test shape: (174722, 10)


[I 2025-09-12 09:00:57,127] A new study created in memory with name: no-name-3ab63123-e7b9-46e5-bdd9-4307264743e1


🔎 Tuning LightGBM with Optuna...
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349442, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 15 dense feature groups (5.33 MB) transferred to GPU in 0.009964 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 119.070823
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349443, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[Lig

[I 2025-09-12 09:08:22,593] Trial 0 finished with value: 26.4795130138149 and parameters: {'n_estimators': 1869, 'learning_rate': 0.007404062443604787, 'num_leaves': 86, 'max_depth': -1, 'subsample': 0.9836585346416793, 'colsample_bytree': 0.8708470304656835, 'reg_alpha': 0.004935218876011492, 'reg_lambda': 0.004792618491096358}. Best is trial 0 with value: 26.4795130138149.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349442, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 15 dense feature groups (5.33 MB) transferred to GPU in 0.010555 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 119.070823
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349443, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram b

[I 2025-09-12 09:16:32,811] Trial 1 finished with value: 26.540883300299523 and parameters: {'n_estimators': 1712, 'learning_rate': 0.020775727037593124, 'num_leaves': 114, 'max_depth': 9, 'subsample': 0.8307595025930496, 'colsample_bytree': 0.7500910723551714, 'reg_alpha': 0.0014076754782626115, 'reg_lambda': 0.001553202838647531}. Best is trial 0 with value: 26.4795130138149.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349442, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 15 dense feature groups (5.33 MB) transferred to GPU in 0.011996 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 119.070823
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349443, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram b

[I 2025-09-12 09:18:34,526] Trial 2 finished with value: 26.472933505685518 and parameters: {'n_estimators': 824, 'learning_rate': 0.016393090045583633, 'num_leaves': 66, 'max_depth': 12, 'subsample': 0.7637510826277338, 'colsample_bytree': 0.6055073874643276, 'reg_alpha': 0.8180645600337956, 'reg_lambda': 0.019076797189885482}. Best is trial 2 with value: 26.472933505685518.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349442, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 15 dense feature groups (5.33 MB) transferred to GPU in 0.007824 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 119.070823
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349443, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram b

[I 2025-09-12 09:26:00,018] Trial 3 finished with value: 26.47833605996664 and parameters: {'n_estimators': 1693, 'learning_rate': 0.005716619805130684, 'num_leaves': 106, 'max_depth': 8, 'subsample': 0.8210578648940893, 'colsample_bytree': 0.6825518792620391, 'reg_alpha': 0.08282637426907959, 'reg_lambda': 0.001358745958897917}. Best is trial 2 with value: 26.472933505685518.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349442, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 15 dense feature groups (5.33 MB) transferred to GPU in 0.012287 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 119.070823
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349443, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram b

[I 2025-09-12 09:28:10,565] Trial 4 finished with value: 26.474628490276032 and parameters: {'n_estimators': 1508, 'learning_rate': 0.010790158638116151, 'num_leaves': 87, 'max_depth': 5, 'subsample': 0.6506368045171381, 'colsample_bytree': 0.7355612736646553, 'reg_alpha': 0.0012925909864397124, 'reg_lambda': 0.2516478716352486}. Best is trial 2 with value: 26.472933505685518.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349442, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 15 dense feature groups (5.33 MB) transferred to GPU in 0.006541 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 119.070823
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349443, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram b

[I 2025-09-12 09:28:40,948] Trial 5 finished with value: 26.462050591076842 and parameters: {'n_estimators': 1282, 'learning_rate': 0.008810217915994916, 'num_leaves': 51, 'max_depth': 2, 'subsample': 0.61272127508332, 'colsample_bytree': 0.8876758250926478, 'reg_alpha': 0.10328928642250527, 'reg_lambda': 0.10810041439594459}. Best is trial 5 with value: 26.462050591076842.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349442, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 15 dense feature groups (5.33 MB) transferred to GPU in 0.009291 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 119.070823
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349443, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram b

[I 2025-09-12 09:32:17,707] Trial 6 finished with value: 26.500752800578113 and parameters: {'n_estimators': 1983, 'learning_rate': 0.020295371721041924, 'num_leaves': 45, 'max_depth': 10, 'subsample': 0.8224390893208527, 'colsample_bytree': 0.8333437390820955, 'reg_alpha': 0.005879496840179276, 'reg_lambda': 0.019833184345685727}. Best is trial 5 with value: 26.462050591076842.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349442, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 15 dense feature groups (5.33 MB) transferred to GPU in 0.007683 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 119.070823
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349443, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram b

[I 2025-09-12 09:36:39,938] Trial 7 finished with value: 26.48344077847587 and parameters: {'n_estimators': 1654, 'learning_rate': 0.011487321813088694, 'num_leaves': 65, 'max_depth': 7, 'subsample': 0.7833083943559405, 'colsample_bytree': 0.6095075556787575, 'reg_alpha': 3.347968079592676, 'reg_lambda': 9.566329707053715}. Best is trial 5 with value: 26.462050591076842.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349442, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 15 dense feature groups (5.33 MB) transferred to GPU in 0.008391 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 119.070823
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349443, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram b

[I 2025-09-12 09:37:35,613] Trial 8 finished with value: 26.491078528176377 and parameters: {'n_estimators': 500, 'learning_rate': 0.049907676429629254, 'num_leaves': 44, 'max_depth': 7, 'subsample': 0.646816790189363, 'colsample_bytree': 0.8270294735788075, 'reg_alpha': 5.203501344444682, 'reg_lambda': 1.011877249500007}. Best is trial 5 with value: 26.462050591076842.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349442, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 15 dense feature groups (5.33 MB) transferred to GPU in 0.006724 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 119.070823
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349443, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram b

[I 2025-09-12 09:40:31,733] Trial 9 finished with value: 26.48061649816405 and parameters: {'n_estimators': 1318, 'learning_rate': 0.013773911176519882, 'num_leaves': 51, 'max_depth': 9, 'subsample': 0.7288777462917719, 'colsample_bytree': 0.8464331473290656, 'reg_alpha': 0.13487081467158374, 'reg_lambda': 0.19563397338291746}. Best is trial 5 with value: 26.462050591076842.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349442, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 15 dense feature groups (5.33 MB) transferred to GPU in 0.006919 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 119.070823
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349443, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram b

[I 2025-09-12 09:40:54,416] Trial 10 finished with value: 26.464237689887398 and parameters: {'n_estimators': 1027, 'learning_rate': 0.0340085174508243, 'num_leaves': 32, 'max_depth': 2, 'subsample': 0.9346681297903612, 'colsample_bytree': 0.9720833585106231, 'reg_alpha': 0.06788283268228162, 'reg_lambda': 1.663069103791625}. Best is trial 5 with value: 26.462050591076842.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349442, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 15 dense feature groups (5.33 MB) transferred to GPU in 0.006463 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 119.070823
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349443, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram b

[I 2025-09-12 09:41:17,733] Trial 11 finished with value: 26.464401855600645 and parameters: {'n_estimators': 1046, 'learning_rate': 0.03479737244961921, 'num_leaves': 31, 'max_depth': 2, 'subsample': 0.9460299883038077, 'colsample_bytree': 0.9952925586894777, 'reg_alpha': 0.06943567386426264, 'reg_lambda': 2.284564563287769}. Best is trial 5 with value: 26.462050591076842.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349442, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 15 dense feature groups (5.33 MB) transferred to GPU in 0.006728 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 119.070823
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349443, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram b

[I 2025-09-12 09:41:50,232] Trial 12 finished with value: 26.4686208178981 and parameters: {'n_estimators': 1039, 'learning_rate': 0.029611393326666426, 'num_leaves': 31, 'max_depth': 3, 'subsample': 0.920200466021827, 'colsample_bytree': 0.9628579821667195, 'reg_alpha': 0.30495221931743216, 'reg_lambda': 0.9092471580853867}. Best is trial 5 with value: 26.462050591076842.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349442, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 15 dense feature groups (5.33 MB) transferred to GPU in 0.006579 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 119.070823
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349443, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram b

[I 2025-09-12 09:45:22,937] Trial 13 finished with value: 26.472055129196733 and parameters: {'n_estimators': 1293, 'learning_rate': 0.00873743530140029, 'num_leaves': 61, 'max_depth': 0, 'subsample': 0.8929369059262139, 'colsample_bytree': 0.9223579725151996, 'reg_alpha': 0.038579213903205234, 'reg_lambda': 0.03666637379648329}. Best is trial 5 with value: 26.462050591076842.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349442, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 15 dense feature groups (5.33 MB) transferred to GPU in 0.007108 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 119.070823
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349443, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram b

[I 2025-09-12 09:45:44,576] Trial 14 finished with value: 26.463640382630768 and parameters: {'n_estimators': 972, 'learning_rate': 0.030952709304247795, 'num_leaves': 128, 'max_depth': 2, 'subsample': 0.6903510809298072, 'colsample_bytree': 0.9287588640146315, 'reg_alpha': 0.021886465207323694, 'reg_lambda': 8.406017806843424}. Best is trial 5 with value: 26.462050591076842.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349442, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 15 dense feature groups (5.33 MB) transferred to GPU in 0.007844 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 119.070823
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349443, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram b

[I 2025-09-12 09:46:19,432] Trial 15 finished with value: 26.460497966138263 and parameters: {'n_estimators': 706, 'learning_rate': 0.005776993529392393, 'num_leaves': 126, 'max_depth': 4, 'subsample': 0.6028703041590714, 'colsample_bytree': 0.9078039064315219, 'reg_alpha': 0.015729200269526917, 'reg_lambda': 7.488328608678389}. Best is trial 15 with value: 26.460497966138263.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349442, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 15 dense feature groups (5.33 MB) transferred to GPU in 0.006871 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 119.070823
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349443, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram b

[I 2025-09-12 09:47:02,842] Trial 16 finished with value: 26.46104506138651 and parameters: {'n_estimators': 530, 'learning_rate': 0.005542154217944269, 'num_leaves': 77, 'max_depth': 5, 'subsample': 0.6153194924685884, 'colsample_bytree': 0.8829860151974651, 'reg_alpha': 0.014884173233152222, 'reg_lambda': 0.07869517235273304}. Best is trial 15 with value: 26.460497966138263.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349442, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 15 dense feature groups (5.33 MB) transferred to GPU in 0.007008 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 119.070823
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349443, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram b

[I 2025-09-12 09:47:45,146] Trial 17 finished with value: 26.460871418983555 and parameters: {'n_estimators': 513, 'learning_rate': 0.0052222727647582325, 'num_leaves': 99, 'max_depth': 5, 'subsample': 0.6193685624013062, 'colsample_bytree': 0.7871276091943321, 'reg_alpha': 0.008764465213730002, 'reg_lambda': 0.061057711013333925}. Best is trial 15 with value: 26.460497966138263.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349442, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 15 dense feature groups (5.33 MB) transferred to GPU in 0.006704 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 119.070823
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349443, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram b

[I 2025-09-12 09:48:21,015] Trial 18 finished with value: 26.46021657590045 and parameters: {'n_estimators': 697, 'learning_rate': 0.005103858315159509, 'num_leaves': 101, 'max_depth': 4, 'subsample': 0.6876808700816466, 'colsample_bytree': 0.7746549025608809, 'reg_alpha': 0.006538685924732687, 'reg_lambda': 0.006847760853833475}. Best is trial 18 with value: 26.46021657590045.


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349442, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 15 dense feature groups (5.33 MB) transferred to GPU in 0.007123 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 119.070823
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 349443, number of used features: 15
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram b

[I 2025-09-12 09:48:58,164] Trial 19 finished with value: 26.46114935164555 and parameters: {'n_estimators': 745, 'learning_rate': 0.008108045588250706, 'num_leaves': 126, 'max_depth': 4, 'subsample': 0.6986170728021649, 'colsample_bytree': 0.6911659500432932, 'reg_alpha': 0.002890829559401973, 'reg_lambda': 0.006455865963287509}. Best is trial 18 with value: 26.46021657590045.
[I 2025-09-12 09:48:58,173] A new study created in memory with name: no-name-27be1c32-e88d-4451-b45e-320ea2a35e3f


Best LGB Params: {'n_estimators': 697, 'learning_rate': 0.005103858315159509, 'num_leaves': 101, 'max_depth': 4, 'subsample': 0.6876808700816466, 'colsample_bytree': 0.7746549025608809, 'reg_alpha': 0.006538685924732687, 'reg_lambda': 0.006847760853833475}
🔎 Tuning XGBoost with Optuna...


[I 2025-09-12 09:50:18,316] Trial 0 finished with value: 26.50786543849586 and parameters: {'n_estimators': 994, 'learning_rate': 0.01031753874382208, 'max_depth': 10, 'subsample': 0.8921399028410254, 'colsample_bytree': 0.6659336482594874, 'reg_alpha': 7.757212017816225, 'reg_lambda': 0.11129745922013305}. Best is trial 0 with value: 26.50786543849586.
[I 2025-09-12 09:51:14,136] Trial 1 finished with value: 26.47822852795136 and parameters: {'n_estimators': 1442, 'learning_rate': 0.006058910093302807, 'max_depth': 8, 'subsample': 0.6921138239192328, 'colsample_bytree': 0.6883206029055325, 'reg_alpha': 0.05608384347195393, 'reg_lambda': 9.843790137742001}. Best is trial 1 with value: 26.47822852795136.
[I 2025-09-12 09:52:00,032] Trial 2 finished with value: 26.474603422909095 and parameters: {'n_estimators': 1190, 'learning_rate': 0.0053394425040516474, 'max_depth': 8, 'subsample': 0.6334545289707934, 'colsample_bytree': 0.7370918053947391, 'reg_alpha': 0.0026894768424428764, 'reg_la

Best XGB Params: {'n_estimators': 771, 'learning_rate': 0.0050555463823753295, 'max_depth': 4, 'subsample': 0.6132962536537535, 'colsample_bytree': 0.8921445288371885, 'reg_alpha': 3.7929644590730045, 'reg_lambda': 0.008056205309425232}
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.030751 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 524164, number of used features: 15
[LightGBM] [Info] Start training from score 119.034899
Saved: submission_lgb.csv | Shape: (174722, 2)
Saved: submission_xgb.csv | Shape: (174722, 2)
Saved: submission_ensemble.csv | Shape: (174722, 2)
Saved: submission_weighted.csv | Shape: (174722, 2)
✅ All submissions saved!


In [3]:
# Prem's Top-Solution Notebook — Strong ensembling + OOF stacking + multi-seed averaging
# Fixed for LightGBM early_stopping issue
import os
import gc
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import RobustScaler
import lightgbm as lgb
import xgboost as xgb
from catboost import CatBoostRegressor
from lightgbm import early_stopping
import optuna
import warnings
warnings.filterwarnings("ignore")

SEED_LIST = [42, 7, 2023]   # multiple seeds for averaging
N_FOLDS = 5
TARGET = "BeatsPerMinute"
ID_COL = "id"

# ------------------------------
# 1) Load data
# ------------------------------
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
print("Train:", train.shape, "Test:", test.shape)

# ------------------------------
# 2) Strong Feature Engineering
# ------------------------------
def create_features(df):
    df = df.copy()
    exclude = [ID_COL, TARGET] if TARGET in df.columns else [ID_COL]
    cols = [c for c in df.columns if c not in exclude]

    # statistical
    df["f_sum"] = df[cols].sum(axis=1)
    df["f_mean"] = df[cols].mean(axis=1)
    df["f_std"] = df[cols].std(axis=1)
    df["f_min"] = df[cols].min(axis=1)
    df["f_max"] = df[cols].max(axis=1)
    df["f_range"] = df["f_max"] - df["f_min"]

    # top variance cols interactions
    var_rank = df[cols].var().sort_values(ascending=False).index.tolist()[:6]
    for i in range(len(var_rank)):
        for j in range(i+1, len(var_rank)):
            a, b = var_rank[i], var_rank[j]
            df[f"{a}_x_{b}"] = df[a] * df[b]
            df[f"{a}_div_{b}"] = df[a] / (df[b] + 1e-6)

    # duration
    if "TrackDurationMs" in df.columns:
        df["TrackDurationMin"] = df["TrackDurationMs"] / 60000.0
        df["log_TrackDurationMin"] = np.log1p(df["TrackDurationMin"])

    # loudness
    if "AudioLoudness" in df.columns:
        df["abs_loudness"] = df["AudioLoudness"].abs()
        df["log_loudness"] = np.log1p(df["abs_loudness"])

    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df.fillna(df.median(numeric_only=True), inplace=True)
    return df

train_f = create_features(train)
test_f = create_features(test)
features = [c for c in train_f.columns if c not in [ID_COL, TARGET]]
print("Feature count:", len(features))

# ------------------------------
# 3) Model parameter helpers
# ------------------------------
def get_lgb_params(seed=42):
    return {
        "objective":"regression",
        "metric":"rmse",
        "verbosity":-1,
        "boosting_type":"gbdt",
        "n_jobs":-1,
        "random_state":seed,
        "num_leaves":128,
        "learning_rate":0.01,
        "n_estimators":3000,
        "colsample_bytree":0.7,
        "subsample":0.8,
        "reg_alpha":0.1,
        "reg_lambda":0.1,
        "min_child_samples":20
    }

def get_xgb_params(seed=42):
    return {
        "objective":"reg:squarederror",
        "verbosity":0,
        "n_estimators":2000,
        "learning_rate":0.01,
        "max_depth":8,
        "subsample":0.8,
        "colsample_bytree":0.7,
        "reg_alpha":0.1,
        "reg_lambda":0.2,
        "random_state":seed,
        "tree_method":"gpu_hist"
    }

def get_cat_params(seed=42):
    return {
        "iterations":2000,
        "learning_rate":0.01,
        "depth":8,
        "l2_leaf_reg":3,
        "random_seed":seed,
        "verbose":0,
        "task_type":"GPU"
    }

# ------------------------------
# 4) OOF trainer
# ------------------------------
def train_oof(model_name, X, y, X_test, seed):
    oof = np.zeros(len(X))
    preds_test = np.zeros(len(X_test))
    kf = KFold(n_splits=N_FOLDS, shuffle=True, random_state=seed)

    for fold, (tr_idx, val_idx) in enumerate(kf.split(X, y)):
        X_tr, X_val = X[tr_idx], X[val_idx]
        y_tr, y_val = y[tr_idx], y[val_idx]

        if model_name == "lgb":
            params = get_lgb_params(seed=seed)
            model = lgb.LGBMRegressor(**params)
            model.fit(
                X_tr, y_tr,
                eval_set=[(X_val, y_val)],
                callbacks=[early_stopping(200)],
                verbose=0
            )
            oof[val_idx] = model.predict(X_val)
            preds_test += model.predict(X_test) / N_FOLDS

        elif model_name == "xgb":
            params = get_xgb_params(seed=seed)
            model = xgb.XGBRegressor(**params)
            model.fit(
                X_tr, y_tr,
                eval_set=[(X_val, y_val)],
                early_stopping_rounds=200,
                verbose=False
            )
            oof[val_idx] = model.predict(X_val)
            preds_test += model.predict(X_test) / N_FOLDS

        elif model_name == "cat":
            params = get_cat_params(seed=seed)
            model = CatBoostRegressor(**params)
            model.fit(X_tr, y_tr, eval_set=(X_val, y_val), use_best_model=True, verbose=0)
            oof[val_idx] = model.predict(X_val)
            preds_test += model.predict(X_test) / N_FOLDS

        gc.collect()

    score = np.sqrt(mean_squared_error(y, oof))
    return oof, preds_test, score

# ------------------------------
# 5) Data prep
# ------------------------------
X_all = train_f[features].values
y_all = train_f[TARGET].values
X_test_all = test_f[features].values

scaler = RobustScaler()
X_all = scaler.fit_transform(X_all)
X_test_all = scaler.transform(X_test_all)

# ------------------------------
# 6) Train base models with multiple seeds
# ------------------------------
oof_base_preds = {}
test_base_preds = {}
print("Starting multi-seed training...")

for seed in SEED_LIST:
    # LightGBM
    oof_lgb, test_lgb, score_lgb = train_oof("lgb", X_all, y_all, X_test_all, seed)
    print(f"Seed {seed} LGB RMSE OOF: {score_lgb:.5f}")
    oof_base_preds[f"lgb_s{seed}"] = oof_lgb
    test_base_preds[f"lgb_s{seed}"] = test_lgb

    # XGBoost
    oof_xgb, test_xgb, score_xgb = train_oof("xgb", X_all, y_all, X_test_all, seed)
    print(f"Seed {seed} XGB RMSE OOF: {score_xgb:.5f}")
    oof_base_preds[f"xgb_s{seed}"] = oof_xgb
    test_base_preds[f"xgb_s{seed}"] = test_xgb

    # CatBoost
    oof_cat, test_cat, score_cat = train_oof("cat", X_all, y_all, X_test_all, seed)
    print(f"Seed {seed} CAT RMSE OOF: {score_cat:.5f}")
    oof_base_preds[f"cat_s{seed}"] = oof_cat
    test_base_preds[f"cat_s{seed}"] = test_cat

# ------------------------------
# 7) Build stacked features
# ------------------------------
stack_train = np.vstack([oof_base_preds[k] for k in sorted(oof_base_preds.keys())]).T
stack_test = np.vstack([test_base_preds[k] for k in sorted(test_base_preds.keys())]).T
print("Stack train:", stack_train.shape, "Stack test:", stack_test.shape)

# ------------------------------
# 8) Meta-model training (LightGBM)
# ------------------------------
meta_oof = np.zeros(stack_train.shape[0])
meta_test = np.zeros(stack_test.shape[0])
kf = KFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED_LIST[0])

for tr_idx, val_idx in kf.split(stack_train, y_all):
    X_tr, X_val = stack_train[tr_idx], stack_train[val_idx]
    y_tr, y_val = y_all[tr_idx], y_all[val_idx]
    params = get_lgb_params(seed=SEED_LIST[0])
    model = lgb.LGBMRegressor(**params)
    model.fit(
        X_tr, y_tr,
        eval_set=[(X_val, y_val)],
        callbacks=[early_stopping(150)],
        verbose=0
    )
    meta_oof[val_idx] = model.predict(X_val)
    meta_test += model.predict(stack_test) / N_FOLDS

meta_score = np.sqrt(mean_squared_error(y_all, meta_oof))
print("Meta OOF RMSE:", meta_score)

# ------------------------------
# 9) Final ensemble blend (Optuna)
# ------------------------------
lgb_oof_avg = np.mean([oof_base_preds[k] for k in oof_base_preds if k.startswith("lgb")], axis=0)
xgb_oof_avg = np.mean([oof_base_preds[k] for k in oof_base_preds if k.startswith("xgb")], axis=0)
cat_oof_avg = np.mean([oof_base_preds[k] for k in oof_base_preds if k.startswith("cat")], axis=0)

lgb_test_avg = np.mean([test_base_preds[k] for k in test_base_preds if k.startswith("lgb")], axis=0)
xgb_test_avg = np.mean([test_base_preds[k] for k in test_base_preds if k.startswith("xgb")], axis=0)
cat_test_avg = np.mean([test_base_preds[k] for k in test_base_preds if k.startswith("cat")], axis=0)

baseline_oof = 0.35*lgb_oof_avg + 0.35*xgb_oof_avg + 0.3*cat_oof_avg
baseline_test = 0.35*lgb_test_avg + 0.35*xgb_test_avg + 0.3*cat_test_avg

def objective_blend(trial):
    w1 = trial.suggest_uniform("w1", 0, 1)
    w2 = trial.suggest_uniform("w2", 0, 1)
    w3 = 1 - w1 - w2
    if w3 < 0: return 1e6
    blend = w1*baseline_oof + w2*meta_oof + w3*(0.33*lgb_oof_avg + 0.33*xgb_oof_avg + 0.34*cat_oof_avg)
    return np.sqrt(mean_squared_error(y_all, blend))

study = optuna.create_study(direction="minimize")
study.optimize(objective_blend, n_trials=200)
best = study.best_params
print("Best blend weights:", best)

w1, w2 = best["w1"], best["w2"]
w3 = 1 - w1 - w2
final_test = w1*baseline_test + w2*meta_test + w3*(0.33*lgb_test_avg + 0.33*xgb_test_avg + 0.34*cat_test_avg)

# ------------------------------
# 10) Save submissions
# ------------------------------
sub = pd.DataFrame({ID_COL: test[ID_COL], TARGET: final_test})
sub.to_csv("submission_final.csv", index=False)
print("Saved submission_final.csv ✅")


Train: (524164, 11) Test: (174722, 10)
Feature count: 49
Starting multi-seed training...


TypeError: LGBMRegressor.fit() got an unexpected keyword argument 'verbose'