In [1]:
# pip install optuna!

In [4]:
# Training Notebook: Retrain CatBoost with Optuna tuning
# Save as train_aqi_optuna.ipynb / train_aqi_optuna.py

import os
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
from datetime import datetime, timedelta

from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from catboost import CatBoostRegressor, Pool
import optuna
import joblib
import matplotlib.pyplot as plt

# ----------------- CONFIG -----------------
DATA_PATH = "Preprocessed_Data_Final.xls"   # your file
TARGET_COL = "AQI"
FEATURES_SAVE = "feature_cols_new.pkl"
MODEL_SAVE = "CatBoost_Optimized.pkl"

RANDOM_SEED = 42
N_TRIALS = 50          # change to 100+ for better tuning (longer)
TS_SPLITS = 5          # number of time-splits for CV
EARLY_STOPPING_ROUNDS = 100
VERBOSE = 100

np.random.seed(RANDOM_SEED)

# ----------------- Utility: load data -----------------
def load_data(path=DATA_PATH):
    if not os.path.exists(path):
        raise FileNotFoundError(f"Training file not found: {path}")
    # Try csv first then excel
    try:
        df = pd.read_csv(path, parse_dates=["Datetime"])
    except Exception:
        df = pd.read_excel(path, engine="openpyxl")
    # Ensure Datetime column
    if "Datetime" in df.columns:
        df["Datetime"] = pd.to_datetime(df["Datetime"])
    else:
        raise ValueError("Data must have a 'Datetime' column.")
    return df

# ----------------- Define expected features -----------------
EXPECTED_FEATURES = [
    'StationId_enc','PM2.5','NO2','CO','SO2','O3','Temperature','DewPoint',
    'WindDirection','WindSpeed','Pressure','RelativeHumidity',
    'PM25_lag1','PM25_lag3','PM25_lag6','PM25_lag24',
    'NO2_lag1','O3_lag1',
    'PM25_roll3','PM25_roll6','PM25_roll12','PM25_roll24',
    'Hour_sin','Hour_cos',
    'Month_sin','Month_cos',
    'Pollution_Load','PM_Ratio','Temp_Humidity_Interaction','Wind_Inv',
    'Hour','DayOfWeek','IsWeekend','Month','Quarter','DayOfYear','Year',
    'Day_Monday','Day_Saturday','Day_Sunday','Day_Thursday','Day_Tuesday','Day_Wednesday',
    'Season_PostMonsoon','Season_Summer','Season_Winter'
]

# ----------------- Feature engineering helpers -----------------
def add_datetime_features(df):
    dt = df["Datetime"]
    df["Hour"] = dt.dt.hour
    df["DayOfWeek"] = dt.dt.weekday
    df["IsWeekend"] = (dt.dt.weekday >= 5).astype(int)
    df["Month"] = dt.dt.month
    df["Quarter"] = dt.dt.quarter
    df["DayOfYear"] = dt.dt.dayofyear
    df["Year"] = dt.dt.year
    df["Hour_sin"] = np.sin(2 * np.pi * df["Hour"] / 24)
    df["Hour_cos"] = np.cos(2 * np.pi * df["Hour"] / 24)
    df["Month_sin"] = np.sin(2 * np.pi * df["Month"] / 12)
    df["Month_cos"] = np.cos(2 * np.pi * df["Month"] / 12)

    # Day one-hot
    df["Day_Monday"] = (df["DayOfWeek"] == 0).astype(int)
    df["Day_Tuesday"] = (df["DayOfWeek"] == 1).astype(int)
    df["Day_Wednesday"] = (df["DayOfWeek"] == 2).astype(int)
    df["Day_Thursday"] = (df["DayOfWeek"] == 3).astype(int)
    df["Day_Friday"] = (df["DayOfWeek"] == 4).astype(int)
    df["Day_Saturday"] = (df["DayOfWeek"] == 5).astype(int)
    df["Day_Sunday"] = (df["DayOfWeek"] == 6).astype(int)

    # Seasons (approx)
    df["Season_Winter"] = df["Month"].isin([12,1,2]).astype(int)
    df["Season_Summer"] = df["Month"].isin([4,5,6]).astype(int)
    df["Season_PostMonsoon"] = df["Month"].isin([10,11]).astype(int)

    return df

def add_interactions(df):
    # safe casts
    df["PM2.5"] = pd.to_numeric(df.get("PM2.5", 0), errors="coerce").fillna(0)
    df["NO2"] = pd.to_numeric(df.get("NO2", 0), errors="coerce").fillna(0)
    df["O3"] = pd.to_numeric(df.get("O3", 0), errors="coerce").fillna(0)
    df["CO"] = pd.to_numeric(df.get("CO", 0), errors="coerce").fillna(0)
    df["SO2"] = pd.to_numeric(df.get("SO2", 0), errors="coerce").fillna(0)
    df["Temperature"] = pd.to_numeric(df.get("Temperature", 0), errors="coerce").fillna(0)
    df["RelativeHumidity"] = pd.to_numeric(df.get("RelativeHumidity", 0), errors="coerce").fillna(0)
    df["WindSpeed"] = pd.to_numeric(df.get("WindSpeed", 0), errors="coerce").fillna(0)

    df["Pollution_Load"] = df["PM2.5"] + df["NO2"] + df["CO"] + df["SO2"] + df["O3"]
    df["PM_Ratio"] = df["PM2.5"] / (df["NO2"] + df["O3"] + 1e-6)
    df["Temp_Humidity_Interaction"] = df["Temperature"] * df["RelativeHumidity"]
    df["Wind_Inv"] = 1.0 / (df["WindSpeed"].replace(0, np.nan) + 1e-6)
    df["Wind_Inv"] = df["Wind_Inv"].fillna(0)
    return df

def compute_lags_rolls(df, station_col="StationId", time_col="Datetime"):
    """
    Compute PM2.5 lags & rolls, NO2/O3 lag1 per station.
    Assumes df sorted by Datetime ascending.
    Works in-place and returns df.
    """
    df = df.sort_values([station_col, time_col])
    # We'll compute per-station
    lag_cols = ["PM25_lag1","PM25_lag3","PM25_lag6","PM25_lag24","NO2_lag1","O3_lag1"]
    roll_cols = ["PM25_roll3","PM25_roll6","PM25_roll12","PM25_roll24"]

    for sid, g in df.groupby(station_col):
        g = g.sort_values(time_col)
        pm = g["PM2.5"].astype(float).fillna(method="ffill").fillna(0)
        no2 = g["NO2"].astype(float).fillna(method="ffill").fillna(0)
        o3 = g["O3"].astype(float).fillna(method="ffill").fillna(0)

        idx = g.index

        # lags
        df.loc[idx, "PM25_lag1"] = pm.shift(1).values
        df.loc[idx, "PM25_lag3"] = pm.shift(3).values
        df.loc[idx, "PM25_lag6"] = pm.shift(6).values
        df.loc[idx, "PM25_lag24"] = pm.shift(24).values

        df.loc[idx, "NO2_lag1"] = no2.shift(1).values
        df.loc[idx, "O3_lag1"] = o3.shift(1).values

        # rolling means (min periods = 1 -> fallback)
        df.loc[idx, "PM25_roll3"] = pm.rolling(window=3, min_periods=1).mean().values
        df.loc[idx, "PM25_roll6"] = pm.rolling(window=6, min_periods=1).mean().values
        df.loc[idx, "PM25_roll12"] = pm.rolling(window=12, min_periods=1).mean().values
        df.loc[idx, "PM25_roll24"] = pm.rolling(window=24, min_periods=1).mean().values

    # fill remaining NaNs with sensible defaults
    for c in lag_cols + roll_cols:
        if c in df.columns:
            df[c] = df[c].astype(float).fillna(method="ffill").fillna(0)

    return df

In [5]:
# ----------------- Loading & preparing training data -----------------
df = load_data(DATA_PATH)
print("Raw data shape:", df.shape)
df = df.sort_values("Datetime").reset_index(drop=True)

# Ensure station encoding exists or create one
if "StationId_enc" not in df.columns:
    if "StationId" not in df.columns:
        raise ValueError("Training data must contain StationId or StationId_enc.")
    # create encoding
    df["StationId_enc"] = pd.factorize(df["StationId"])[0]

# ------- Ensure datetime & interactions -------
df = add_datetime_features(df)
df = add_interactions(df)

# ------- Compute lags & rolls -------
df = compute_lags_rolls(df, station_col="StationId", time_col="Datetime")

# ------- Ensure expected features exist (create if missing with fallbacks) -------
for feat in EXPECTED_FEATURES:
    if feat not in df.columns:
        # create safe fallback numeric column
        df[feat] = 0.0
        print(f"WARNING: Feature {feat} missing in source and created fallback zeros.")

# Drop rows where target is missing
df = df[~df[TARGET_COL].isna()].copy()
df = df.sort_values("Datetime").reset_index(drop=True)
print("Prepared data shape:", df.shape)

# ----------------- Feature list & X,y -----------------
feature_list = [f for f in EXPECTED_FEATURES if f in df.columns]
print(f"Using {len(feature_list)} features for training.")

X = df[feature_list].astype(float)
y = df[TARGET_COL].astype(float)



# ----------------- TimeSeriesSplit setup -----------------
tscv = TimeSeriesSplit(n_splits=TS_SPLITS)

# utility to evaluate predictions
def evaluate(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = mean_squared_error(y_true, y_pred) ** 0.5  # manual sqrt
    r2 = r2_score(y_true, y_pred)
    return {"MAE": mae, "RMSE": rmse, "R2": r2}



Raw data shape: (1449833, 50)
Prepared data shape: (1449833, 51)
Using 46 features for training.


In [4]:

# ----------------- Baseline CatBoost (quick) -----------------
print("\nTraining baseline CatBoost (quick)...")
baseline_params = {
    "iterations": 1000,
    "learning_rate": 0.1,
    "depth": 6,
    "random_seed": RANDOM_SEED,
    "loss_function": "MAE",
    "verbose": 0,
    "early_stopping_rounds": 50
}

cv_maes = []
for train_idx, test_idx in tscv.split(X):
    X_tr, X_val = X.iloc[train_idx], X.iloc[test_idx]
    y_tr, y_val = y.iloc[train_idx], y.iloc[test_idx]

    model = CatBoostRegressor(**baseline_params)
    model.fit(X_tr, y_tr, eval_set=(X_val, y_val), verbose=False, use_best_model=True)
    pred = model.predict(X_val)
    cv_maes.append(mean_absolute_error(y_val, pred))

print("Baseline CV MAE (mean):", np.mean(cv_maes))

# ----------------- Optuna hyperparameter tuning -----------------
print("\nStarting Optuna tuning... (this may take time)")

def objective(trial):
    params = {
        "iterations": trial.suggest_int("iterations", 500, 3000, step=100),
        "depth": trial.suggest_int("depth", 4, 10),
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.3, log=True),
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1e-3, 10.0, log=True),
        "random_strength": trial.suggest_float("random_strength", 0.0, 20.0),
        "bagging_temperature": trial.suggest_float("bagging_temperature", 0.0, 1.5),
    
        "loss_function": "MAE",
        "random_seed": RANDOM_SEED,
        "verbose": False,
    
        # GPU HERE
        "task_type": "GPU",
        "devices": "0"
    }

    # time-series CV -> compute mean MAE across splits
    maes = []
    for train_idx, test_idx in tscv.split(X):
        X_tr, X_val = X.iloc[train_idx], X.iloc[test_idx]
        y_tr, y_val = y.iloc[train_idx], y.iloc[test_idx]

        model = CatBoostRegressor(**params)
        model.fit(X_tr, y_tr, eval_set=(X_val, y_val),
                  early_stopping_rounds=EARLY_STOPPING_ROUNDS, verbose=False, use_best_model=True)
        pred = model.predict(X_val)
        maes.append(mean_absolute_error(y_val, pred))

    return float(np.mean(maes))

study = optuna.create_study(direction="minimize", sampler=optuna.samplers.TPESampler(seed=RANDOM_SEED))
study.optimize(objective, n_trials=N_TRIALS, show_progress_bar=True)

print("Best trial params:")
print(study.best_trial.params)
best_params = study.best_trial.params

Raw data shape: (1449833, 50)
Prepared data shape: (1449833, 51)
Using 46 features for training.

Training baseline CatBoost (quick)...


[I 2025-11-24 18:51:47,266] A new study created in memory with name: no-name-08baeb72-72e2-4d0f-a4b4-a1eab8de159e


Baseline CV MAE (mean): 24.74097526352017

Starting Optuna tuning... (this may take time)


  0%|          | 0/50 [00:00<?, ?it/s]

Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 18:55:00,071] Trial 0 finished with value: 74.37166837468942 and parameters: {'iterations': 1400, 'depth': 10, 'learning_rate': 0.06504856968981275, 'l2_leaf_reg': 0.24810409748678125, 'random_strength': 3.1203728088487304, 'bagging_temperature': 0.23399178050430397}. Best is trial 0 with value: 74.37166837468942.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 18:56:34,410] Trial 1 finished with value: 96.59508068492605 and parameters: {'iterations': 600, 'depth': 10, 'learning_rate': 0.030834348179355788, 'l2_leaf_reg': 0.679657809075816, 'random_strength': 0.41168988591604894, 'bagging_temperature': 1.4548647782429915}. Best is trial 0 with value: 74.37166837468942.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 18:59:32,553] Trial 2 finished with value: 100.68760526557311 and parameters: {'iterations': 2600, 'depth': 5, 'learning_rate': 0.002820996133514492, 'l2_leaf_reg': 0.00541524411940254, 'random_strength': 6.0848448591907545, 'bagging_temperature': 0.7871346474483567}. Best is trial 0 with value: 74.37166837468942.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 19:01:36,979] Trial 3 finished with value: 86.20152216918929 and parameters: {'iterations': 1600, 'depth': 6, 'learning_rate': 0.032781876533976156, 'l2_leaf_reg': 0.003613894271216527, 'random_strength': 5.842892970704363, 'bagging_temperature': 0.5495427649405376}. Best is trial 0 with value: 74.37166837468942.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 19:04:51,415] Trial 4 finished with value: 101.39777950897658 and parameters: {'iterations': 1600, 'depth': 9, 'learning_rate': 0.003123317753376431, 'l2_leaf_reg': 0.11400863701127326, 'random_strength': 11.84829137724085, 'bagging_temperature': 0.06967561907999659}. Best is trial 0 with value: 74.37166837468942.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 19:07:53,803] Trial 5 finished with value: 102.19997120379503 and parameters: {'iterations': 2000, 'depth': 5, 'learning_rate': 0.0014492412389916862, 'l2_leaf_reg': 6.245139574743075, 'random_strength': 19.312640661491187, 'bagging_temperature': 1.2125960221746916}. Best is trial 0 with value: 74.37166837468942.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 19:11:59,689] Trial 6 finished with value: 85.2151318743262 and parameters: {'iterations': 1200, 'depth': 4, 'learning_rate': 0.04953682563497157, 'l2_leaf_reg': 0.057624872164786026, 'random_strength': 2.4407646968955765, 'bagging_temperature': 0.7427653651669053}. Best is trial 0 with value: 74.37166837468942.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 19:15:37,525] Trial 7 finished with value: 102.36710622065625 and parameters: {'iterations': 500, 'depth': 10, 'learning_rate': 0.004375517173207359, 'l2_leaf_reg': 0.4467752817973907, 'random_strength': 6.2342215217882195, 'bagging_temperature': 0.7801020317667162}. Best is trial 0 with value: 74.37166837468942.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 19:21:04,993] Trial 8 finished with value: 30.966920215140885 and parameters: {'iterations': 1900, 'depth': 5, 'learning_rate': 0.25221951700214285, 'l2_leaf_reg': 1.2604664585649468, 'random_strength': 18.789978831283783, 'bagging_temperature': 1.3422410256414732}. Best is trial 8 with value: 30.966920215140885.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 19:35:33,959] Trial 9 finished with value: 101.95120327476684 and parameters: {'iterations': 2000, 'depth': 10, 'learning_rate': 0.0016565580440884786, 'l2_leaf_reg': 0.006080390190296602, 'random_strength': 0.9045457782107613, 'bagging_temperature': 0.4879954961448965}. Best is trial 8 with value: 30.966920215140885.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 19:46:38,368] Trial 10 finished with value: 26.311670222047006 and parameters: {'iterations': 2900, 'depth': 7, 'learning_rate': 0.2521511680920944, 'l2_leaf_reg': 5.997863556602811, 'random_strength': 19.677484322923892, 'bagging_temperature': 1.18176389348056}. Best is trial 10 with value: 26.311670222047006.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 19:57:09,228] Trial 11 finished with value: 25.99469791388404 and parameters: {'iterations': 3000, 'depth': 7, 'learning_rate': 0.27192070434055626, 'l2_leaf_reg': 9.75862618615181, 'random_strength': 19.916037747159553, 'bagging_temperature': 1.1635775569331008}. Best is trial 11 with value: 25.99469791388404.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 20:07:48,092] Trial 12 finished with value: 25.62907698232821 and parameters: {'iterations': 3000, 'depth': 8, 'learning_rate': 0.28991670888825555, 'l2_leaf_reg': 7.613544142056221, 'random_strength': 13.797808466993839, 'bagging_temperature': 1.1099669963174337}. Best is trial 12 with value: 25.62907698232821.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 20:19:49,209] Trial 13 finished with value: 35.643378329544575 and parameters: {'iterations': 3000, 'depth': 8, 'learning_rate': 0.11737007427787913, 'l2_leaf_reg': 9.940058213957167, 'random_strength': 14.074739653725292, 'bagging_temperature': 1.0362440724912654}. Best is trial 12 with value: 25.62907698232821.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 20:30:58,817] Trial 14 finished with value: 91.36749975131947 and parameters: {'iterations': 2500, 'depth': 8, 'learning_rate': 0.013935764853569597, 'l2_leaf_reg': 3.1319414286199625, 'random_strength': 15.3893258514656, 'bagging_temperature': 1.0009115924385412}. Best is trial 12 with value: 25.62907698232821.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 20:39:09,694] Trial 15 finished with value: 41.598604268370245 and parameters: {'iterations': 2400, 'depth': 7, 'learning_rate': 0.1210571697080923, 'l2_leaf_reg': 1.8432076609359598, 'random_strength': 15.997565589787676, 'bagging_temperature': 1.0282569121934013}. Best is trial 12 with value: 25.62907698232821.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 20:44:43,175] Trial 16 finished with value: 92.42294346141145 and parameters: {'iterations': 2700, 'depth': 8, 'learning_rate': 0.011637366568065192, 'l2_leaf_reg': 0.025923322912027078, 'random_strength': 10.419745726544182, 'bagging_temperature': 1.2658779566265734}. Best is trial 12 with value: 25.62907698232821.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 20:48:18,012] Trial 17 finished with value: 42.77481336121916 and parameters: {'iterations': 2300, 'depth': 6, 'learning_rate': 0.12393037713305721, 'l2_leaf_reg': 2.4750561465550756, 'random_strength': 16.96665765051186, 'bagging_temperature': 1.4539047780730767}. Best is trial 12 with value: 25.62907698232821.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 20:54:53,339] Trial 18 finished with value: 25.725244001057824 and parameters: {'iterations': 2800, 'depth': 9, 'learning_rate': 0.2717895569377977, 'l2_leaf_reg': 0.018667114291117604, 'random_strength': 13.410902852097724, 'bagging_temperature': 0.9300925004441754}. Best is trial 12 with value: 25.62907698232821.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 21:00:37,246] Trial 19 finished with value: 54.201737473430725 and parameters: {'iterations': 2300, 'depth': 9, 'learning_rate': 0.08193300703294562, 'l2_leaf_reg': 0.0010927222371903014, 'random_strength': 12.83234847998753, 'bagging_temperature': 0.8654237608345167}. Best is trial 12 with value: 25.62907698232821.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 21:14:16,053] Trial 20 finished with value: 95.59623457628331 and parameters: {'iterations': 2800, 'depth': 9, 'learning_rate': 0.007722824967048146, 'l2_leaf_reg': 0.0227406496982038, 'random_strength': 8.7241234207407, 'bagging_temperature': 0.6456781243554162}. Best is trial 12 with value: 25.62907698232821.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 21:19:46,877] Trial 21 finished with value: 25.537196106843133 and parameters: {'iterations': 3000, 'depth': 8, 'learning_rate': 0.2987646259036188, 'l2_leaf_reg': 0.018949650626008688, 'random_strength': 16.993667922363702, 'bagging_temperature': 0.9398476987366979}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 21:24:00,013] Trial 22 finished with value: 29.884537315984932 and parameters: {'iterations': 2700, 'depth': 8, 'learning_rate': 0.17257924498457997, 'l2_leaf_reg': 0.026718734153364735, 'random_strength': 13.728822372032052, 'bagging_temperature': 0.9073338706202653}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 21:32:36,726] Trial 23 finished with value: 27.836867316780904 and parameters: {'iterations': 3000, 'depth': 9, 'learning_rate': 0.17938881054000283, 'l2_leaf_reg': 0.012405319719901522, 'random_strength': 11.165819853107761, 'bagging_temperature': 1.0682432082991105}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 21:41:21,006] Trial 24 finished with value: 26.524225911053044 and parameters: {'iterations': 2200, 'depth': 8, 'learning_rate': 0.29805956605036477, 'l2_leaf_reg': 0.08090596045831489, 'random_strength': 17.532024649494613, 'bagging_temperature': 0.931532210079434}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 21:54:48,322] Trial 25 finished with value: 30.52759912034523 and parameters: {'iterations': 2800, 'depth': 9, 'learning_rate': 0.15735858856227097, 'l2_leaf_reg': 0.1815187992933246, 'random_strength': 14.90274267081162, 'bagging_temperature': 0.3601177393106059}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 21:58:53,203] Trial 26 finished with value: 78.18763806867712 and parameters: {'iterations': 1000, 'depth': 7, 'learning_rate': 0.07940503007125457, 'l2_leaf_reg': 0.0020179622064785924, 'random_strength': 8.989759894341695, 'bagging_temperature': 0.6735679566103294}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 22:11:13,649] Trial 27 finished with value: 73.30885162524278 and parameters: {'iterations': 2600, 'depth': 8, 'learning_rate': 0.03742596372811546, 'l2_leaf_reg': 0.010424310534796652, 'random_strength': 17.171968509485694, 'bagging_temperature': 1.3179405098745498}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 22:24:20,746] Trial 28 finished with value: 27.268012439985206 and parameters: {'iterations': 2800, 'depth': 9, 'learning_rate': 0.20436624113638752, 'l2_leaf_reg': 0.0336198003799703, 'random_strength': 12.957104122219524, 'bagging_temperature': 1.0822630066349461}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 22:32:24,764] Trial 29 finished with value: 56.63551477422044 and parameters: {'iterations': 2500, 'depth': 6, 'learning_rate': 0.07259619478035277, 'l2_leaf_reg': 0.20434587069222476, 'random_strength': 12.074285465067891, 'bagging_temperature': 0.8772275473972431}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 22:46:20,731] Trial 30 finished with value: 48.035422306371224 and parameters: {'iterations': 2200, 'depth': 10, 'learning_rate': 0.1036683324915951, 'l2_leaf_reg': 0.013082555863532061, 'random_strength': 16.296544810117705, 'bagging_temperature': 1.126632376473619}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 22:55:26,945] Trial 31 finished with value: 26.20012260733656 and parameters: {'iterations': 3000, 'depth': 7, 'learning_rate': 0.24959860124445385, 'l2_leaf_reg': 0.9109031241719803, 'random_strength': 17.975472945636323, 'bagging_temperature': 1.1613433765601644}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 23:05:06,036] Trial 32 finished with value: 29.44612308078223 and parameters: {'iterations': 3000, 'depth': 7, 'learning_rate': 0.16423557062651067, 'l2_leaf_reg': 9.971995733781661, 'random_strength': 14.362603922286064, 'bagging_temperature': 0.951898566989475}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 23:11:51,024] Trial 33 finished with value: 25.72759914873418 and parameters: {'iterations': 2700, 'depth': 8, 'learning_rate': 0.2996918676349386, 'l2_leaf_reg': 0.3701148630311426, 'random_strength': 18.38786472764159, 'bagging_temperature': 1.3889076386965278}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 23:17:06,328] Trial 34 finished with value: 85.86960852549022 and parameters: {'iterations': 2600, 'depth': 8, 'learning_rate': 0.020108128975951173, 'l2_leaf_reg': 0.4107305731365445, 'random_strength': 18.38148148448989, 'bagging_temperature': 1.4864664956802305}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 23:19:07,350] Trial 35 finished with value: 89.11095682397311 and parameters: {'iterations': 800, 'depth': 9, 'learning_rate': 0.051774551767701155, 'l2_leaf_reg': 0.05430278926785755, 'random_strength': 15.537654979362998, 'bagging_temperature': 1.3426282343828482}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 23:24:44,317] Trial 36 finished with value: 28.019050479021036 and parameters: {'iterations': 2800, 'depth': 8, 'learning_rate': 0.19316297991997247, 'l2_leaf_reg': 0.11890965500754659, 'random_strength': 16.594649577823077, 'bagging_temperature': 1.2465060132960977}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 23:28:10,728] Trial 37 finished with value: 31.534352719354075 and parameters: {'iterations': 1400, 'depth': 9, 'learning_rate': 0.29717706211434775, 'l2_leaf_reg': 0.004429058397065861, 'random_strength': 13.358055595847414, 'bagging_temperature': 1.4010391815843208}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 23:35:58,857] Trial 38 finished with value: 34.011398185630966 and parameters: {'iterations': 2700, 'depth': 10, 'learning_rate': 0.1364253374771912, 'l2_leaf_reg': 0.3275989459557172, 'random_strength': 14.95597139830358, 'bagging_temperature': 0.0793675314042086}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 23:41:15,427] Trial 39 finished with value: 62.28841963101188 and parameters: {'iterations': 2500, 'depth': 8, 'learning_rate': 0.05837228016780493, 'l2_leaf_reg': 0.5702370509701077, 'random_strength': 7.94558472834213, 'bagging_temperature': 0.8312126796043242}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 23:45:55,355] Trial 40 finished with value: 44.081011407355994 and parameters: {'iterations': 2900, 'depth': 6, 'learning_rate': 0.09426102393842131, 'l2_leaf_reg': 0.04720382640205022, 'random_strength': 11.594819287138204, 'bagging_temperature': 0.7253707195838844}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 23:49:42,129] Trial 41 finished with value: 26.678847403732323 and parameters: {'iterations': 2900, 'depth': 7, 'learning_rate': 0.23161660478316254, 'l2_leaf_reg': 3.87495738867573, 'random_strength': 19.364366092734087, 'bagging_temperature': 1.1318902747094273}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 23:53:38,860] Trial 42 finished with value: 25.790708534637975 and parameters: {'iterations': 3000, 'depth': 7, 'learning_rate': 0.2970773146197139, 'l2_leaf_reg': 1.601608975022151, 'random_strength': 18.515696748139124, 'bagging_temperature': 1.2834480034689093}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 23:57:16,591] Trial 43 finished with value: 27.86510100346692 and parameters: {'iterations': 2700, 'depth': 7, 'learning_rate': 0.2102034639912888, 'l2_leaf_reg': 1.514945605333811, 'random_strength': 18.456975443416948, 'bagging_temperature': 1.380263476372444}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-24 23:59:49,863] Trial 44 finished with value: 27.295346743291326 and parameters: {'iterations': 2900, 'depth': 4, 'learning_rate': 0.299848290772007, 'l2_leaf_reg': 0.7440383712221853, 'random_strength': 17.556324154726656, 'bagging_temperature': 1.2751645677887542}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-25 00:02:34,023] Trial 45 finished with value: 46.7249863568886 and parameters: {'iterations': 1700, 'depth': 8, 'learning_rate': 0.14297558100577815, 'l2_leaf_reg': 5.030141505576981, 'random_strength': 19.041757919777346, 'bagging_temperature': 0.9807800785560424}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-25 00:07:02,056] Trial 46 finished with value: 26.673348907158413 and parameters: {'iterations': 2900, 'depth': 8, 'learning_rate': 0.2204869499767029, 'l2_leaf_reg': 0.008848965534912299, 'random_strength': 4.326662590391881, 'bagging_temperature': 1.1970701077379313}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-25 00:12:06,109] Trial 47 finished with value: 81.89344253737062 and parameters: {'iterations': 2600, 'depth': 9, 'learning_rate': 0.024971908125622656, 'l2_leaf_reg': 1.145100033615237, 'random_strength': 16.014507845895665, 'bagging_temperature': 1.4015622512205492}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-25 00:14:33,675] Trial 48 finished with value: 101.7333068035723 and parameters: {'iterations': 1900, 'depth': 6, 'learning_rate': 0.0022020489245008876, 'l2_leaf_reg': 0.2840764145507083, 'random_strength': 14.329931921582906, 'bagging_temperature': 1.0921837728694057}. Best is trial 21 with value: 25.537196106843133.


Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU
Default metric period is 5 because MAE is/are not implemented for GPU


[I 2025-11-25 00:18:03,512] Trial 49 finished with value: 99.07945127056459 and parameters: {'iterations': 2400, 'depth': 7, 'learning_rate': 0.004972911883588379, 'l2_leaf_reg': 2.491572072806746, 'random_strength': 12.446766509544567, 'bagging_temperature': 0.5692463096472594}. Best is trial 21 with value: 25.537196106843133.
Best trial params:
{'iterations': 3000, 'depth': 8, 'learning_rate': 0.2987646259036188, 'l2_leaf_reg': 0.018949650626008688, 'random_strength': 16.993667922363702, 'bagging_temperature': 0.9398476987366979}

Training final model on full dataset...


Default metric period is 5 because MAE is/are not implemented for GPU


0:	learn: 103.1694105	total: 33.1ms	remaining: 1m 39s
100:	learn: 92.9906493	total: 3.23s	remaining: 1m 32s
200:	learn: 83.8974972	total: 6.39s	remaining: 1m 29s
300:	learn: 76.0529192	total: 9.56s	remaining: 1m 25s
400:	learn: 69.4540351	total: 12.6s	remaining: 1m 21s
500:	learn: 63.7415371	total: 15.9s	remaining: 1m 19s
600:	learn: 58.7515997	total: 19.1s	remaining: 1m 16s
700:	learn: 54.3575819	total: 22.2s	remaining: 1m 12s
800:	learn: 50.4233743	total: 25.2s	remaining: 1m 9s
900:	learn: 46.8417314	total: 28.4s	remaining: 1m 6s
1000:	learn: 43.5383220	total: 31.4s	remaining: 1m 2s
1100:	learn: 40.4385333	total: 34.3s	remaining: 59.2s
1200:	learn: 37.5931904	total: 37.2s	remaining: 55.7s
1300:	learn: 35.2972763	total: 40.2s	remaining: 52.5s
1400:	learn: 33.4677773	total: 43s	remaining: 49.1s
1500:	learn: 31.9973293	total: 45.9s	remaining: 45.9s
1600:	learn: 30.8206724	total: 48.9s	remaining: 42.7s
1700:	learn: 29.8608240	total: 52s	remaining: 39.7s
1800:	learn: 29.0783890	total: 55s

TypeError: got an unexpected keyword argument 'squared'

In [6]:
best_params = {'iterations': 3000, 'depth': 8, 'learning_rate': 0.2987646259036188, 'l2_leaf_reg': 0.018949650626008688, 'random_strength': 16.993667922363702, 'bagging_temperature': 0.9398476987366979}


# ----------------- Train final model on full data with best params -----------------
final_params = dict(best_params)
# ensure a few params are set
final_params.update({
    "loss_function": "MAE",
    "random_seed": RANDOM_SEED,
    "verbose": VERBOSE,
    "task_type": "GPU",   # GPU HERE
    "devices": "0"
})

print("\nTraining final model on full dataset...")
final_model = CatBoostRegressor(**final_params)
final_model.fit(X, y, verbose=VERBOSE)

# ----------------- Evaluation on hold-out: last fold as pseudo-test -----------------
# We'll evaluate final model on the last split's test set (most recent portion)
splits = list(tscv.split(X))
train_idx, test_idx = splits[-1]
X_tr, X_test = X.iloc[train_idx], X.iloc[test_idx]
y_tr, y_test = y.iloc[train_idx], y.iloc[test_idx]

pred_test = final_model.predict(X_test)
metrics = evaluate(y_test, pred_test)
print("\nEvaluation on most recent fold (pseudo-test):")
print(metrics)

# ----------------- Per-station performance summary -----------------
print("\nPer-station MAE summary (on full dataset predictions)...")
df_preds = df.copy()
df_preds["pred"] = final_model.predict(X)
per_station = df_preds.groupby("StationId").apply(lambda g: mean_absolute_error(g[TARGET_COL], g["pred"]))
per_station = per_station.sort_values()
print(per_station.head(10))
print("...")

# ----------------- Feature importance -----------------
# try:
#     fi = final_model.get_feature_importance(prettified=True)
#     print("\nTop features (CatBoost importance):")
#     print(fi.head(20))
#     # save plot
#     fi_df = pd.DataFrame({
#         "feature": final_model.feature_names_,
#         "importance": final_model.get_feature_importance()
#     })
#     fi_df = fi_df.sort_values("importance", ascending=False).head(30)
#     plt.figure(figsize=(8,10))
#     plt.barh(fi_df["feature"][::-1], fi_df["importance"][::-1])
#     plt.title("Feature importance (top 30)")
#     plt.tight_layout()
#     plt.savefig("feature_importance_top30.png", dpi=150)
#     print("Feature importance plot saved -> feature_importance_top30.png")
# except Exception as e:
#     print("Could not compute feature importance:", e)


# ----------------- Save model and features -----------------
print("\nSaving model and feature list...")
joblib.dump(final_model, MODEL_SAVE)
joblib.dump(feature_list, FEATURES_SAVE)
print(f"Model saved to: {MODEL_SAVE}")
print(f"Feature list saved to: {FEATURES_SAVE}")

# ----------------- Quick summary -----------------
print("\nTraining complete.")
print("Baseline CV MAE (mean):", np.mean(cv_maes))
print("Final evaluation MAE:", metrics["MAE"])



Training final model on full dataset...


Default metric period is 5 because MAE is/are not implemented for GPU


0:	learn: 103.1693995	total: 220ms	remaining: 11m
100:	learn: 92.9906493	total: 3.64s	remaining: 1m 44s
200:	learn: 83.8974972	total: 7.11s	remaining: 1m 39s
300:	learn: 76.0529192	total: 11.2s	remaining: 1m 40s
400:	learn: 69.4540351	total: 15.4s	remaining: 1m 39s
500:	learn: 63.7415371	total: 19.2s	remaining: 1m 35s
600:	learn: 58.7515997	total: 23.1s	remaining: 1m 32s
700:	learn: 54.3575819	total: 27.3s	remaining: 1m 29s
800:	learn: 50.4233743	total: 31.2s	remaining: 1m 25s
900:	learn: 46.8417259	total: 34.9s	remaining: 1m 21s
1000:	learn: 43.5383220	total: 38.5s	remaining: 1m 16s
1100:	learn: 40.4385333	total: 42.4s	remaining: 1m 13s
1200:	learn: 37.5931904	total: 46s	remaining: 1m 8s
1300:	learn: 35.2972791	total: 49.7s	remaining: 1m 4s
1400:	learn: 33.4677773	total: 53.4s	remaining: 1m
1500:	learn: 31.9973293	total: 56.9s	remaining: 56.8s
1600:	learn: 30.8206724	total: 1m	remaining: 52.8s
1700:	learn: 29.8608240	total: 1m 4s	remaining: 48.9s
1800:	learn: 29.0783890	total: 1m 7s	r

NameError: name 'cv_maes' is not defined