In [1]:
!pip install polars scikeras pandas numpy matplotlib scikit-learn xgboost lightgbm catboost optuna tensorflow torch torchvision torchaudio arch backtesting empyrical



In [2]:
# ╔═════════════════════════════════════════════════════════════════╗
# ║  0.  Imports: data, modelling, hyper‑opt, back‑testing, plots  ║
# ╚═════════════════════════════════════════════════════════════════╝
import polars as pl                      # fast, lazy frame ops
import numpy as np
import pandas as pd                      # many ML libs expect pandas
from datetime import datetime

# Scikit‑learn / classical ML
from sklearn.model_selection import TimeSeriesSplit, train_test_split
from sklearn.preprocessing  import StandardScaler
from sklearn.pipeline       import Pipeline
from sklearn.metrics        import mean_squared_error, mean_absolute_error, r2_score

from sklearn.ensemble       import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.linear_model   import LinearRegression, BayesianRidge, Ridge, SGDRegressor
from sklearn.svm            import SVR

# Boosted trees
import xgboost              as xgb
import lightgbm             as lgb
from catboost import CatBoostRegressor

# Deep learning
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
import torch
import torch.nn as nn

# Probabilistic models
from arch import arch_model             # GARCH, EGARCH, GJR‑GARCH …

# Hyper‑parameter optimisation
import optuna

# Back‑testing engine
from backtesting import Backtest, Strategy

# Performance / risk metrics
import empyrical as emp                 # sharpe_ratio, drawdown, etc.

# Plots
import matplotlib.pyplot as plt
plt.style.use("ggplot")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import csv
import polars as pl

PATH = "/Users/jadenfix/Downloads/btc_data.csv"

# ── 1. Read just the header with Python’s csv.reader ─────────────────────────
with open(PATH, newline="") as f:
    header = next(csv.reader(f))
# header is now a list like ["time","low","high",…,"score"]

# ── 2. Build a schema that forces every column to Utf8 ────────────────────────
utf_schema = {col: pl.Utf8 for col in header}

# ── 3. Load the CSV under that all-string schema ─────────────────────────────
df_raw = pl.read_csv(
    PATH,
    dtypes=utf_schema,
    infer_schema_length=None    # no need for Polars to guess anything
)

# ── 4. Clean numeric columns: strip out “,” and cast to Float64 ───────────────
numeric_cols = [c for c in header if c != "time"]
df_clean = df_raw.with_columns([
    pl.col(c)
      .str.replace_all(",", "")
      .cast(pl.Float64)
      .alias(c)
    for c in numeric_cols
])

# ── 5. Parse your date & dedupe/sort ─────────────────────────────────────────
df_pl = (
    df_clean
    .with_columns(
        pl.col("time")
          .str.strptime(pl.Date, "%m/%d/%y")
          .alias("date")
    )
    .unique(subset=["date"])
    .sort("date")
)

print(df_pl.head(), "\nTotal rows:", df_pl.height)

shape: (5, 26)
┌─────────┬──────────┬──────────┬──────────┬───┬──────────────┬──────────────┬───────┬────────────┐
│ time    ┆ low      ┆ high     ┆ open     ┆ … ┆ Volume_MA_20 ┆ OBV          ┆ score ┆ date       │
│ ---     ┆ ---      ┆ ---      ┆ ---      ┆   ┆ ---          ┆ ---          ┆ ---   ┆ ---        │
│ str     ┆ f64      ┆ f64      ┆ f64      ┆   ┆ f64          ┆ f64          ┆ f64   ┆ date       │
╞═════════╪══════════╪══════════╪══════════╪═══╪══════════════╪══════════════╪═══════╪════════════╡
│ 5/8/24  ┆ 60851.04 ┆ 63013.05 ┆ 62315.75 ┆ … ┆ 9502.678697  ┆ -158170.6207 ┆ 0.0   ┆ 2024-05-08 │
│ 5/9/24  ┆ 60601.6  ┆ 63424.14 ┆ 61169.53 ┆ … ┆ 9727.153941  ┆ -150684.1947 ┆ 0.0   ┆ 2024-05-09 │
│ 5/10/24 ┆ 60150.0  ┆ 63470.0  ┆ 63073.55 ┆ … ┆ 9757.652202  ┆ -159044.2501 ┆ 0.0   ┆ 2024-05-10 │
│ 5/11/24 ┆ 60450.13 ┆ 61482.0  ┆ 60787.99 ┆ … ┆ 9774.158897  ┆ -147533.1202 ┆ 0.0   ┆ 2024-05-11 │
│ 5/12/24 ┆ 60576.05 ┆ 61843.45 ┆ 60814.64 ┆ … ┆ 10191.79843  ┆ -145195.0521 ┆ 0.0   

  df_raw = pl.read_csv(


In [4]:
# ╔══════════════════════════════════════╗
# ║  2. Split into model & back‑test sets ║
# ╚══════════════════════════════════════╝
start_model = datetime(2024, 5, 8)
end_model   = datetime(2025, 4, 7)
start_bt    = datetime(2025, 4, 8)
end_bt      = datetime(2025, 5, 7)

model_pl    = df_pl.filter(pl.col("date").is_between(start_model, end_model))
backtest_pl = df_pl.filter(pl.col("date").is_between(start_bt,   end_bt))

# Keep only the columns we’ll feed to ML (everything except 'change' for X)
FEATURES = [c for c in model_pl.columns if c not in ("date", "change", "pct_change")]
TARGET   = "change"         # already present in your CSV

# Convert to pandas for libraries that want it
model_df    = model_pl.to_pandas().set_index("date")
backtest_df = backtest_pl.to_pandas().set_index("date")

print(model_df.head())
print(backtest_df.head())

               time       low      high      open     close        volume  \
date                                                                        
2024-05-08   5/8/24  60851.04  63013.05  62315.75  61169.53   7486.425968   
2024-05-09   5/9/24  60601.60  63424.14  61169.53  63073.57   8360.055382   
2024-05-10  5/10/24  60150.00  63470.00  63073.55  60787.47  11511.129910   
2024-05-11  5/11/24  60450.13  61482.00  60787.99  60814.63   2338.068108   
2024-05-12  5/12/24  60576.05  61843.45  60814.64  61453.02   2694.975779   

             change  pct_change      SMA_20      SMA_50  ...         MACD  \
date                                                     ...                
2024-05-08 -1146.22   -1.839374  65890.2210  66417.9682  ... -1646.858068   
2024-05-09  1904.04    3.112726  66247.8435  66426.8854  ... -1542.743776   
2024-05-10 -2286.08   -3.624467  66472.6375  66371.6810  ... -1581.282060   
2024-05-11    26.64    0.043824  66850.1930  66373.6450  ... -1374.703914  

In [5]:
# ╔════════════════════════════════════╗
# ║  3. Train‑test split (80 / 20)     ║
# ╚════════════════════════════════════╝
split_idx       = int(len(model_df) * 0.8)
train_df        = model_df.iloc[:split_idx]
valid_df        = model_df.iloc[split_idx:]

X_train, y_train = train_df[FEATURES], train_df[TARGET]
X_valid, y_valid = valid_df[FEATURES], valid_df[TARGET]

In [6]:
# ╔═══════════════════════════════════════════════════╗
# ║  4.  Helper: metric bundle & Optuna objective     ║
# ╚═══════════════════════════════════════════════════╝
def regression_metrics(y_true, y_pred):
    mse  = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae  = mean_absolute_error(y_true, y_pred)
    r2   = r2_score(y_true, y_pred)
    return dict(mse=mse, rmse=rmse, mae=mae, r2=r2)

def objective(trial, ModelClass, X, y):
    # Define a small but meaningful search space per model --------------------
    if ModelClass is RandomForestRegressor:
        params = {
            "n_estimators": trial.suggest_int("n_estimators", 100, 600, step=100),
            "max_depth":    trial.suggest_int("max_depth",    4,   12),
            "min_samples_split": trial.suggest_int("min_samples_split", 2, 10),
        }
    elif ModelClass is xgb.XGBRegressor:
        params = {
            "n_estimators": trial.suggest_int("n_estimators", 300, 1200, step=300),
            "eta":          trial.suggest_float("eta", 0.01, 0.3, log=True),
            "max_depth":    trial.suggest_int("max_depth", 3, 10),
            "subsample":    trial.suggest_float("subsample", 0.5, 1.0),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
            "objective":    "reg:squarederror",
        }
    # … add other model parameter spaces as elif blocks …

    # Pipeline with scaling where needed --------------------------------------
    pipe = Pipeline([
        ("scaler", StandardScaler()),
        ("model",  ModelClass(**params))
    ])

    tscv  = TimeSeriesSplit(n_splits=5)
    rmse_scores = []
    for train_idx, test_idx in tscv.split(X):
        X_tr, X_te = X.iloc[train_idx], X.iloc[test_idx]
        y_tr, y_te = y.iloc[train_idx], y.iloc[test_idx]
        pipe.fit(X_tr, y_tr)
        preds = pipe.predict(X_te)
        rmse_scores.append( np.sqrt(mean_squared_error(y_te, preds)) )
    return np.mean(rmse_scores)

In [7]:
# Helper imports (only once)
import numpy as np, optuna, xgboost as xgb, lightgbm as lgb
from sklearn.pipeline      import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics       import mean_squared_error
from sklearn.ensemble      import RandomForestRegressor
from sklearn.svm           import SVR
from sklearn.linear_model  import LinearRegression, BayesianRidge
from catboost              import CatBoostRegressor
from arch                  import arch_model
from scikeras.wrappers     import KerasRegressor
from sklearn.base          import BaseEstimator, RegressorMixin, clone
from tensorflow.keras      import layers, models

# ---------- data slices you already built ----------
X_train_n = X_train.select_dtypes(np.number)
X_valid_n = X_valid.select_dtypes(np.number)

MODELS, METRICS = {}, {}      # shared dicts

# ---------- metrics & CV helpers ----------
def reg_metrics(y, p):
    mse  = mean_squared_error(y, p)
    return dict(mse=mse, rmse=np.sqrt(mse))

def safe_cv_score(estimator_cls, params, X, y, splits=4):
    """TimeSeries CV with try/except: failed fit ⇒ large loss."""
    tscv = TimeSeriesSplit(splits)
    rmse = []
    for tr, te in tscv.split(X):
        pipe = Pipeline([("sc", StandardScaler()),
                         ("m", estimator_cls(**params))])
        try:
            pipe.fit(X.iloc[tr], y.iloc[tr])
            pred = pipe.predict(X.iloc[te])
            rmse.append(np.sqrt(mean_squared_error(y.iloc[te], pred)))
        except Exception:
            return 1e12          # punish invalid trial
    return np.mean(rmse)

# ---------- small Keras builders ----------
def build_mlp(n, hid=64, layers_n=2):
    m = models.Sequential([layers.Input((n,))])
    for _ in range(layers_n): m.add(layers.Dense(hid, "relu"))
    m.add(layers.Dense(1)); m.compile("adam", "mse"); return m

def build_lstm(n, hid=32):
    i = layers.Input((n,)); x = layers.Reshape((n,1))(i)
    x = layers.LSTM(hid)(x); o = layers.Dense(1)(x)
    m = models.Model(i, o); m.compile("adam", "mse"); return m

def build_trans(n, heads=4, d=32):
    i = layers.Input((n,1))
    x = layers.MultiHeadAttention(num_heads=heads, key_dim=d)(i,i)
    x = layers.GlobalAveragePooling1D()(x); o = layers.Dense(1)(x)
    m = models.Model(i, o); m.compile("adam", "mse"); return m

# ---------- GARCH wrapper ----------
class GARCHReg(BaseEstimator, RegressorMixin):
    def __init__(self, p=1, q=1): self.p,self.q=p,q
    def fit(self, X, y):
        self.mod_ = arch_model(y, p=self.p, q=self.q, mean="zero").fit(disp="off")
        return self
    def predict(self, X):
        f = self.mod_.forecast(horizon=1, reindex=False).mean.iloc[-1,0]
        return np.repeat(f, len(X))

In [8]:
# ╔══════════════════════════════════════════════════════════════════════╗
# ║  Classical models 1‑5: fit on train_df ➜ predict on valid_df         ║
# ╠══════════════════════════════════════════════════════════════════════╣
# ║  1) ARIMA            2) SARIMA          3) ARIMAX                    ║
# ║  4) VAR              5) GARCH                                         ║
# ╚══════════════════════════════════════════════════════════════════════╝

from statsmodels.tsa.arima.model   import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.api           import VAR
from arch                          import arch_model
import pandas as pd, numpy as np

# -----------------------------------------------------------------------
# Utility: quick metric capture & storage in shared dicts
# -----------------------------------------------------------------------
def _store(name, y_true, y_pred, model):
    MODELS[name]  = model
    METRICS[name] = regression_metrics(y_true, y_pred)
    print(f"{name:<6} →", {k: f"{v:.4f}" for k, v in METRICS[name].items()})

# -----------------------------------------------------------------------
# 1) ARIMA  (non‑seasonal)  — univariate
# -----------------------------------------------------------------------
arima_mod = ARIMA(train_df[TARGET], order=(5,1,2)).fit()
arima_pred = arima_mod.forecast(len(valid_df))
_store("arima", y_valid, arima_pred, arima_mod)

# -----------------------------------------------------------------------
# 2) SARIMA (seasonal ARIMA) — here: weekly seasonality, tweak as needed
# -----------------------------------------------------------------------
sarima_mod = SARIMAX(
    train_df[TARGET],
    order=(3,1,1),
    seasonal_order=(1,0,1,7)     # (P,D,Q,s)  – s=7 for daily data w/ weekly season
).fit(disp=False)
sarima_pred = sarima_mod.forecast(len(valid_df))
_store("sarima", y_valid, sarima_pred, sarima_mod)

# -----------------------------------------------------------------------
# 3) ARIMAX  (ARIMA + exogenous regressors) — uses your numeric FEATURES
# -----------------------------------------------------------------------
exog_train, exog_valid = X_train.select_dtypes(np.number), X_valid.select_dtypes(np.number)

arimax_mod = SARIMAX(
    endog=train_df[TARGET],
    exog =exog_train,
    order=(4,1,1)
).fit(disp=False)

arimax_pred = arimax_mod.forecast(steps=len(valid_df), exog=exog_valid)
_store("arimax", y_valid, arimax_pred, arimax_mod)

# -----------------------------------------------------------------------
# 4) VAR  (Vector AutoRegression)  — multivariate price/volume/RVI example
#     -> first‑diff to improve stationarity, then cum‑sum back
# -----------------------------------------------------------------------
var_cols   = ['close', 'volume', 'RSI']              # choose any 2+ numeric columns
var_train  = train_df[var_cols].diff().dropna()
var_model  = VAR(var_train).fit(maxlags=5)

# Forecast differences, then invert to price‑level prediction for 'close'
var_fore   = var_model.forecast(var_train.values[-var_model.k_ar:], steps=len(valid_df))
var_pred   = train_df['close'].iloc[-1] + np.cumsum(var_fore[:,0])
_store("var", valid_df['close'], var_pred, var_model)

# -----------------------------------------------------------------------
# 5) GARCH(1,1) — walk‑forward forecasts, one value per valid row
# -----------------------------------------------------------------------
train_ret  = train_df['close'].pct_change().dropna() * 100          # %
valid_ret  = valid_df['close'].pct_change().fillna(0) * 100         # len == len(valid_df)

hist_ret   = train_ret.copy()
garch_preds = []

for realised in valid_ret:
    # fit on all history so far, forecast next‑step mean return
    gmod  = arch_model(hist_ret, p=1, q=1, mean="zero").fit(disp="off")
    fore  = gmod.forecast(horizon=1).mean.iloc[-1, 0]
    garch_preds.append(fore)

    # append the *actual* realised return for the next iteration
    hist_ret = pd.concat([hist_ret, pd.Series([realised])], ignore_index=True)

garch_pred = np.array(garch_preds)            #  len == len(valid_df)
_store("garch", y_valid, garch_pred, gmod)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


arima  → {'mse': '6700966.8911', 'rmse': '2588.6226', 'mae': '1873.8580', 'r2': '-0.0396'}


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


sarima → {'mse': '6727057.8101', 'rmse': '2593.6572', 'mae': '1872.5656', 'r2': '-0.0436'}
arimax → {'mse': '0.0000', 'rmse': '0.0007', 'mae': '0.0004', 'r2': '1.0000'}
var    → {'mse': '522236654.2831', 'rmse': '22852.4978', 'mae': '20820.6272', 'r2': '-10.8683'}


  self._init_dates(dates, freq)


garch  → {'mse': '6598262.7590', 'rmse': '2568.7084', 'mae': '1838.2215', 'r2': '-0.0236'}


In [9]:
# ╔═════════  Model — RandomForest  ═════════╗
from sklearn.ensemble import RandomForestRegressor as ESTIMATOR

def search_space(trial):
    return {
        "n_estimators": trial.suggest_int("n_est", 200, 600, 100),
        "max_depth"   : trial.suggest_int("max_d", 4, 12),
    }

study = optuna.create_study(direction="minimize")
study.optimize(lambda t: safe_cv_score(ESTIMATOR, search_space(t),
                                       X_train_n, y_train),
               n_trials=15, show_progress_bar=False)
best = search_space(study.best_trial)

rf_model = Pipeline([("sc", StandardScaler()),
                     ("m", ESTIMATOR(**best))]).fit(X_train_n, y_train)

MODELS["rf"]  = rf_model
METRICS["rf"] = reg_metrics(y_valid, rf_model.predict(X_valid_n))
print("✓ RF", METRICS["rf"])

[I 2025-05-27 23:51:12,954] A new study created in memory with name: no-name-3403bb90-08f9-4481-ab49-dda7d4a128b2
  "n_estimators": trial.suggest_int("n_est", 200, 600, 100),
[I 2025-05-27 23:51:14,358] Trial 0 finished with value: 1870.1490579240526 and parameters: {'n_est': 400, 'max_d': 5}. Best is trial 0 with value: 1870.1490579240526.
  "n_estimators": trial.suggest_int("n_est", 200, 600, 100),
[I 2025-05-27 23:51:16,216] Trial 1 finished with value: 1881.269142926314 and parameters: {'n_est': 400, 'max_d': 10}. Best is trial 0 with value: 1870.1490579240526.
  "n_estimators": trial.suggest_int("n_est", 200, 600, 100),
[I 2025-05-27 23:51:18,716] Trial 2 finished with value: 1873.5012839252613 and parameters: {'n_est': 600, 'max_d': 8}. Best is trial 0 with value: 1870.1490579240526.
  "n_estimators": trial.suggest_int("n_est", 200, 600, 100),
[I 2025-05-27 23:51:20,097] Trial 3 finished with value: 1867.4379255943927 and parameters: {'n_est': 300, 'max_d': 10}. Best is trial 3 w

✓ RF {'mse': 7000096.098216658, 'rmse': np.float64(2645.7694718581697)}


In [10]:
!pip install tbats orbit
# ╔══════════════════════════════════════════════════════════════╗
# ║  Classical models 6‑10: Holt‑Winters, TBATS, Theta, BSTS,    ║
# ║  State‑Space (Kalman)                                        ║
# ╚══════════════════════════════════════════════════════════════╝

from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.forecasting.theta import ThetaModel
from statsmodels.tsa.statespace.structural import UnobservedComponents
# tbats & orbit may need pip install
# put this at the top of the classical‑models cell
try:
    from tbats import TBATS
except (ImportError, ValueError):          # ← add ValueError here
    TBATS = None
try:
    from orbit.models import BSTS
    from orbit.diagnostics.metrics import smape
except ImportError:
    BSTS = None

import warnings, pandas as pd, numpy as np
warnings.filterwarnings("ignore")

def _store(name, y_true, y_pred, model):
    MODELS[name]  = model
    METRICS[name] = regression_metrics(y_true, y_pred)
    print(f"{name:<6} →", {k: f"{v:.4f}" for k, v in METRICS[name].items()})

# -----------------------------------------------------------------
# 6) Holt‑Winters Exponential Smoothing  (additive trend + season)
# -----------------------------------------------------------------
hw_mod = ExponentialSmoothing(
    train_df[TARGET],
    trend="add", seasonal="add", seasonal_periods=7  # weekly season
).fit()
hw_pred = hw_mod.forecast(len(valid_df))
_store("holt", y_valid, hw_pred, hw_mod)

# -----------------------------------------------------------------
# 7) TBATS   (handles multiple/complex seasonalities)
# -----------------------------------------------------------------
if TBATS is not None:
    tb = TBATS(seasonal_periods=[7, 30])          # weekly & monthly
    tb_mod = tb.fit(train_df[TARGET])
    tb_pred = tb_mod.forecast(steps=len(valid_df))
    _store("tbats", y_valid, tb_pred, tb_mod)
else:
    print("tbats  ⚠️  skipped – install tbats to enable")

# -----------------------------------------------------------------
# 8) Theta Method  (fast, often a tough baseline)
# -----------------------------------------------------------------
theta_mod = ThetaModel(train_df[TARGET], period=7).fit()
theta_pred = theta_mod.forecast(len(valid_df))
_store("theta", y_valid, theta_pred, theta_mod)

# -----------------------------------------------------------------
# 9) Bayesian Structural Time Series (BSTS)  via orbit
# -----------------------------------------------------------------
if BSTS is not None:
    bsts_mod = BSTS(
        response_col=TARGET,
        date_col="date",
        seasonality=7,
        estimator="stan-map",
        seed=42
    )
    bsts_mod.fit(train_df.reset_index())          # orbit needs a DataFrame
    bsts_pred = bsts_mod.predict(valid_df.reset_index())["prediction"]
    _store("bsts", y_valid.reset_index(drop=True), bsts_pred.values, bsts_mod)
else:
    print("bsts   ⚠️  skipped – install orbit to enable")

# -----------------------------------------------------------------
# 10) State‑Space / Kalman Filter  (local level + trend + weekly season)
# -----------------------------------------------------------------
ucm_mod = UnobservedComponents(
    train_df[TARGET],
    level="local level",
    trend=True,
    seasonal=7
).fit()
ucm_pred = ucm_mod.forecast(len(valid_df))
_store("ucm", y_valid, ucm_pred, ucm_mod)

# -----------------------------------------------------------------
# Summary of the five new models
# -----------------------------------------------------------------
pd.DataFrame({k:v for k,v in METRICS.items() if k in ["holt","tbats","theta","bsts","ucm"]}).T\
  .style.format({c:"{:.4f}" for c in ["mse","rmse","mae","r2"]})

holt   → {'mse': '6853035.9145', 'rmse': '2617.8304', 'mae': '1871.9559', 'r2': '-0.0632'}
tbats  ⚠️  skipped – install tbats to enable
theta  → {'mse': '6917639.2026', 'rmse': '2630.1405', 'mae': '1914.1060', 'r2': '-0.0732'}
bsts   ⚠️  skipped – install orbit to enable
ucm    → {'mse': '6816889.9182', 'rmse': '2610.9174', 'mae': '1864.7472', 'r2': '-0.0575'}


Unnamed: 0,mse,rmse,mae,r2
holt,6853035.9145,2617.8304,1871.9559,-0.0632
theta,6917639.2026,2630.1405,1914.106,-0.0732
ucm,6816889.9182,2610.9174,1864.7472,-0.0575


In [11]:
# ╔═════════════════════════════════════════════════════════╗
# ║  Models 11‑15 : HMM, OLS, Ridge, Lasso, Elastic‑Net     ║
# ╚═════════════════════════════════════════════════════════╝

import numpy as np, pandas as pd, warnings
from sklearn.pipeline      import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model  import LinearRegression, RidgeCV, LassoCV, ElasticNetCV

# ------------------- helper to store -----------------------------------------------------------------
def _store(name, y_true, y_pred, model):
    MODELS[name]  = model
    METRICS[name] = regression_metrics(y_true, y_pred)
    print(f"{name:<10} →", {k: f"{v:.4f}" for k, v in METRICS[name].items()})

# ------------------- 11) Hidden‑Markov Model on returns ----------------------------------------------
try:
    from hmmlearn.hmm import GaussianHMM

    # fit on % returns of training close price
    train_ret = train_df['close'].pct_change().dropna().values.reshape(-1, 1) * 100
    hmm_mod   = GaussianHMM(n_components=2, covariance_type="diag", n_iter=200).fit(train_ret)

    # Regime‑means as forecasts: map each hidden state to its mean return
    means = hmm_mod.means_.flatten()
    valid_ret = valid_df['close'].pct_change().fillna(0).values.reshape(-1, 1) * 100
    regimes   = hmm_mod.predict(valid_ret)
    hmm_pred  = means[regimes]                      # shape == len(valid_df)

    _store("hmm", y_valid, hmm_pred, hmm_mod)

except ImportError:
    print("hmm      ⚠️  skipped – install hmmlearn to enable")

# ------------------- feature set (numeric only) ------------------------------------------------------
X_tr = X_train.select_dtypes(np.number)
X_va = X_valid.select_dtypes(np.number)

# ------------------- 12) Ordinary Least Squares ------------------------------------------------------
ols_pipe = Pipeline([("sc", StandardScaler()), ("lr", LinearRegression())])
ols_pipe.fit(X_tr, y_train); ols_pred = ols_pipe.predict(X_va)
_store("ols", y_valid, ols_pred, ols_pipe)

# ------------------- 13) RidgeCV  --------------------------------------------------------------------
ridge_pipe = Pipeline([
    ("sc",  StandardScaler()),
    ("rg",  RidgeCV(alphas=np.logspace(-3, 3, 30)))
])
ridge_pipe.fit(X_tr, y_train); ridge_pred = ridge_pipe.predict(X_va)
_store("ridge", y_valid, ridge_pred, ridge_pipe)

# ------------------- 14) LassoCV ---------------------------------------------------------------------
lasso_pipe = Pipeline([
    ("sc",  StandardScaler()),
    ("ls",  LassoCV(alphas=np.logspace(-3, 1, 40), max_iter=5000))
])
lasso_pipe.fit(X_tr, y_train); lasso_pred = lasso_pipe.predict(X_va)
_store("lasso", y_valid, lasso_pred, lasso_pipe)

# ------------------- 15) Elastic‑NetCV ---------------------------------------------------------------
enet_pipe = Pipeline([
    ("sc",  StandardScaler()),
    ("en",  ElasticNetCV(
                l1_ratio=[0.1, 0.3, 0.5, 0.7, 0.9],
                alphas=np.logspace(-3, 1, 30),
                max_iter=5000))
])
enet_pipe.fit(X_tr, y_train); enet_pred = enet_pipe.predict(X_va)
_store("elastic", y_valid, enet_pred, enet_pipe)

# -------------------  quick summary ------------------------------------------------------------------
(pd.DataFrame({k:v for k,v in METRICS.items() if k in
               ["hmm","ols","ridge","lasso","elastic"]})
   .T.style.format({c:"{:.4f}" for c in ["mse","rmse","mae","r2"]}))

hmm        → {'mse': '6598030.5609', 'rmse': '2568.6632', 'mae': '1838.2115', 'r2': '-0.0236'}
ols        → {'mse': '0.0000', 'rmse': '0.0000', 'mae': '0.0000', 'r2': '1.0000'}
ridge      → {'mse': '7088.1727', 'rmse': '84.1913', 'mae': '46.3310', 'r2': '0.9989'}
lasso      → {'mse': '578552.2286', 'rmse': '760.6262', 'mae': '422.6052', 'r2': '0.9102'}
elastic    → {'mse': '4570186.7964', 'rmse': '2137.7995', 'mae': '1176.2133', 'r2': '0.2910'}


Unnamed: 0,mse,rmse,mae,r2
hmm,6598030.5609,2568.6632,1838.2115,-0.0236
ols,0.0,0.0,0.0,1.0
ridge,7088.1727,84.1913,46.331,0.9989
lasso,578552.2286,760.6262,422.6052,0.9102
elastic,4570186.7964,2137.7995,1176.2133,0.291


In [12]:
# ╔══════════════════════════════════════════════════════╗
# ║  Models 16‑20 : BayesianRidge, SVR, KNN, DTR, RF     ║
# ╚══════════════════════════════════════════════════════╝

from sklearn.pipeline      import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model  import BayesianRidge
from sklearn.svm           import SVR
from sklearn.neighbors     import KNeighborsRegressor
from sklearn.tree          import DecisionTreeRegressor
from sklearn.ensemble      import RandomForestRegressor

# numeric feature slices (already built earlier)
X_tr = X_train.select_dtypes(np.number)
X_va = X_valid.select_dtypes(np.number)

def _store(name, y_true, y_pred, model):
    MODELS[name]  = model
    METRICS[name] = regression_metrics(y_true, y_pred)
    print(f"{name:<10} →", {k: f"{v:.4f}" for k, v in METRICS[name].items()})

# -----------------------------------------------------------------
# 16) Bayesian Ridge
# -----------------------------------------------------------------
bayes_pipe = Pipeline([
    ("sc",  StandardScaler()),
    ("br",  BayesianRidge())
])
bayes_pipe.fit(X_tr, y_train)
_store("bayesridge", y_valid, bayes_pipe.predict(X_va), bayes_pipe)

# -----------------------------------------------------------------
# 17) Support Vector Regression  (RBF kernel)
# -----------------------------------------------------------------
svr_pipe = Pipeline([
    ("sc",  StandardScaler()),
    ("svr", SVR(C=10, gamma="scale"))
])
svr_pipe.fit(X_tr, y_train)
_store("svr", y_valid, svr_pipe.predict(X_va), svr_pipe)

# -----------------------------------------------------------------
# 18) K‑Nearest‑Neighbors Regressor
# -----------------------------------------------------------------
knn_pipe = Pipeline([
    ("sc",  StandardScaler()),
    ("knn", KNeighborsRegressor(n_neighbors=10, weights="distance"))
])
knn_pipe.fit(X_tr, y_train)
_store("knn", y_valid, knn_pipe.predict(X_va), knn_pipe)

# -----------------------------------------------------------------
# 19) Decision Tree Regressor
# -----------------------------------------------------------------
dtr = DecisionTreeRegressor(max_depth=8, min_samples_split=5, random_state=42)
dtr.fit(X_tr, y_train)
_store("dtr", y_valid, dtr.predict(X_va), dtr)

# -----------------------------------------------------------------
# 20) Random Forest Regressor  (re‑fit for completeness)
# -----------------------------------------------------------------
rf = RandomForestRegressor(
        n_estimators=400, max_depth=8, min_samples_split=4,
        n_jobs=-1, random_state=42)
rf.fit(X_tr, y_train)
_store("rf", y_valid, rf.predict(X_va), rf)

# -----------------------------------------------------------------
# Quick summary of 16‑20
# -----------------------------------------------------------------
(pd.DataFrame({k:v for k,v in METRICS.items()
               if k in ["bayesridge","svr","knn","dtr","rf"]})
   .T.style.format({c:"{:.4f}" for c in ["mse","rmse","mae","r2"]}))

bayesridge → {'mse': '265803985.7965', 'rmse': '16303.4961', 'mae': '9485.1596', 'r2': '-40.2359'}
svr        → {'mse': '6628945.6466', 'rmse': '2574.6739', 'mae': '1848.0617', 'r2': '-0.0284'}
knn        → {'mse': '6360875.7272', 'rmse': '2522.0777', 'mae': '1810.4004', 'r2': '0.0132'}
dtr        → {'mse': '7589051.4396', 'rmse': '2754.8233', 'mae': '2128.5349', 'r2': '-0.1773'}
rf         → {'mse': '7056017.0868', 'rmse': '2656.3165', 'mae': '1821.5976', 'r2': '-0.0946'}


Unnamed: 0,mse,rmse,mae,r2
rf,7056017.0868,2656.3165,1821.5976,-0.0946
bayesridge,265803985.7965,16303.4961,9485.1596,-40.2359
svr,6628945.6466,2574.6739,1848.0617,-0.0284
knn,6360875.7272,2522.0777,1810.4004,0.0132
dtr,7589051.4396,2754.8233,2128.5349,-0.1773


In [13]:
!pip install xgboost lightgbm catboost sklearn-contrib-py-earth
# ╔════════════════════════════════════════════════════════════════╗
# ║  Models 21‑26 : ExtraTrees, GBM, XGB, LGBM, CatBoost, MARS     ║
# ╚════════════════════════════════════════════════════════════════╝

import numpy as np, pandas as pd, warnings
from sklearn.ensemble  import ExtraTreesRegressor, GradientBoostingRegressor
from xgboost           import XGBRegressor
import lightgbm        as lgb
from catboost          import CatBoostRegressor
try:
    from pyearth import Earth              # MARS implementation
except ImportError:
    Earth = None

warnings.filterwarnings("ignore")

# ---------------- numeric feature slices (already defined) ----------------
X_tr = X_train.select_dtypes(np.number)
X_va = X_valid.select_dtypes(np.number)

# ---------------- helper to capture & print metrics -----------------------
def _store(name, y_true, y_pred, model):
    MODELS[name]  = model
    METRICS[name] = regression_metrics(y_true, y_pred)
    print(f"{name:<10s} →", {k: f"{v:.4f}" for k, v in METRICS[name].items()})

# --------------------------------------------------------------------------
# 21) Extra Trees Regressor
# --------------------------------------------------------------------------
et = ExtraTreesRegressor(
        n_estimators=400,
        max_depth=None,
        min_samples_split=2,
        n_jobs=-1,
        random_state=42)
et.fit(X_tr, y_train)
_store("extratrees", y_valid, et.predict(X_va), et)

# --------------------------------------------------------------------------
# 22) Gradient Boosting Regressor
# --------------------------------------------------------------------------
gbr = GradientBoostingRegressor(
        n_estimators=500,
        learning_rate=0.05,
        max_depth=3,
        subsample=0.9,
        random_state=42)
gbr.fit(X_tr, y_train)
_store("gbr", y_valid, gbr.predict(X_va), gbr)

# --------------------------------------------------------------------------
# 23) XGBoost Regressor
# --------------------------------------------------------------------------
xgbm = XGBRegressor(
        n_estimators=800,
        eta=0.03,
        max_depth=6,
        subsample=0.9,
        colsample_bytree=0.8,
        objective="reg:squarederror",
        n_jobs=-1,
        random_state=42)
xgbm.fit(X_tr, y_train)
_store("xgb", y_valid, xgbm.predict(X_va), xgbm)

# --------------------------------------------------------------------------
# 24) LightGBM Regressor
# --------------------------------------------------------------------------
lgbm = lgb.LGBMRegressor(
        n_estimators=800,
        learning_rate=0.03,
        max_depth=-1,
        subsample=0.9,
        colsample_bytree=0.8,
        objective="regression",
        random_state=42,
        n_jobs=-1)
lgbm.fit(X_tr, y_train)
_store("lgb", y_valid, lgbm.predict(X_va), lgbm)

# --------------------------------------------------------------------------
# 25) CatBoost Regressor
# --------------------------------------------------------------------------
cat = CatBoostRegressor(
        iterations=600,
        learning_rate=0.05,
        depth=6,
        loss_function="RMSE",
        random_seed=42,
        verbose=False)
cat.fit(X_tr, y_train)
_store("cat", y_valid, cat.predict(X_va), cat)

# --------------------------------------------------------------------------
# 26) MARS / Earth
# --------------------------------------------------------------------------
if Earth is not None:
    mars = Earth(max_degree=2, penalty=2.0)   # basic config
    mars.fit(X_tr, y_train)
    _store("mars", y_valid, mars.predict(X_va), mars)
else:
    print("mars       ⚠️  skipped – install sklearn‑contrib‑py‑earth to enable")

# --------------------------------------------------------------------------
# Quick tabular summary for models 21‑26
# --------------------------------------------------------------------------
(pd.DataFrame({k:v for k,v in METRICS.items()
               if k in ["extratrees","gbr","xgb","lgb","cat","mars"]})
   .T.style.format({c: "{:.4f}" for c in ["mse","rmse","mae","r2"]}))

Collecting sklearn-contrib-py-earth
  Using cached sklearn-contrib-py-earth-0.1.0.tar.gz (1.0 MB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: sklearn-contrib-py-earth
[33m  DEPRECATION: Building 'sklearn-contrib-py-earth' using the legacy setup.py bdist_wheel mechanism, which will be removed in a future version. pip 25.3 will enforce this behaviour change. A possible replacement is to use the standardized build interface by setting the `--use-pep517` option, (possibly combined with `--no-build-isolation`), or adding a `pyproject.toml` file to the source tree of 'sklearn-contrib-py-earth'. Discussion can be found at https://github.com/pypa/pip/issues/6334[0m[33m
[0m  Building wheel for sklearn-contrib-py-earth (setup.py) ... [?25lerror
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py bdist_wheel[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m [31m[81 lines of o

Unnamed: 0,mse,rmse,mae,r2
extratrees,6465702.7244,2542.7746,1762.6069,-0.0031
gbr,5938403.1849,2436.8839,1619.8406,0.0787
xgb,6141106.4613,2478.1256,1685.0939,0.0473
lgb,7802720.2937,2793.335,1900.7409,-0.2105
cat,5909982.5695,2431.0456,1686.518,0.0831


In [14]:
# ╔════════════════════════════════════════════════════════════╗
# ║  Models 27‑32 : MLP, CNN, RNN, LSTM, GRU, CNN‑LSTM         ║
# ╚════════════════════════════════════════════════════════════╝
import numpy as np, tensorflow as tf, pandas as pd, warnings
from tensorflow.keras import layers, models, callbacks
from sklearn.preprocessing import StandardScaler
warnings.filterwarnings("ignore")

# -----------------------------------------------------------------
# 1) Scale numeric features once, keep scaler for later inference
# -----------------------------------------------------------------
scaler = StandardScaler()
X_tr_np = scaler.fit_transform(X_train.select_dtypes(np.number)).astype("float32")
X_va_np = scaler.transform(X_valid.select_dtypes(np.number)).astype("float32")
y_tr_np = y_train.values.astype("float32")
y_va_np = y_valid.values.astype("float32")

n_feats = X_tr_np.shape[1]
seq_tr  = X_tr_np.reshape(-1, n_feats, 1)      # (samples, timesteps, channels)
seq_va  = X_va_np.reshape(-1, n_feats, 1)

early = callbacks.EarlyStopping(patience=5, restore_best_weights=True)

def _store(name, y_true, y_pred, model, scaler=scaler):
    MODELS[name]  = (scaler, model)           # tuple for later use
    METRICS[name] = regression_metrics(y_true, y_pred)
    print(f"{name:<8} →", {k: f"{v:.4f}" for k,v in METRICS[name].items()})

# -----------------------------------------------------------------
# 27) MLP
# -----------------------------------------------------------------
mlp = models.Sequential([
    layers.Input((n_feats,)),
    layers.Dense(128, activation="relu"),
    layers.Dense(64, activation="relu"),
    layers.Dense(1)
])
mlp.compile("adam", "mse")
mlp.fit(X_tr_np, y_tr_np, validation_data=(X_va_np, y_va_np),
        epochs=40, batch_size=32, verbose=0, callbacks=[early])
_store("mlp", y_valid, mlp.predict(X_va_np, verbose=0).flatten(), mlp)

# -----------------------------------------------------------------
# 28) 1‑D CNN
# -----------------------------------------------------------------
cnn = models.Sequential([
    layers.Input((n_feats,1)),
    layers.Conv1D(32, 3, activation="relu"),
    layers.MaxPooling1D(2),
    layers.Conv1D(16, 3, activation="relu"),
    layers.GlobalAveragePooling1D(),
    layers.Dense(1)
])
cnn.compile("adam", "mse")
cnn.fit(seq_tr, y_tr_np, validation_data=(seq_va, y_va_np),
        epochs=40, batch_size=32, verbose=0, callbacks=[early])
_store("cnn", y_valid, cnn.predict(seq_va, verbose=0).flatten(), cnn)

# -----------------------------------------------------------------
# 29) Simple RNN
# -----------------------------------------------------------------
rnn = models.Sequential([
    layers.Input((n_feats,1)),
    layers.SimpleRNN(64),
    layers.Dense(1)
])
rnn.compile("adam", "mse")
rnn.fit(seq_tr, y_tr_np, validation_data=(seq_va, y_va_np),
        epochs=40, batch_size=32, verbose=0, callbacks=[early])
_store("rnn", y_valid, rnn.predict(seq_va, verbose=0).flatten(), rnn)

# -----------------------------------------------------------------
# 30) LSTM
# -----------------------------------------------------------------
lstm = models.Sequential([
    layers.Input((n_feats,1)),
    layers.LSTM(64),
    layers.Dense(1)
])
lstm.compile("adam", "mse")
lstm.fit(seq_tr, y_tr_np, validation_data=(seq_va, y_va_np),
         epochs=40, batch_size=32, verbose=0, callbacks=[early])
_store("lstm", y_valid, lstm.predict(seq_va, verbose=0).flatten(), lstm)

# -----------------------------------------------------------------
# 31) GRU
# -----------------------------------------------------------------
gru = models.Sequential([
    layers.Input((n_feats,1)),
    layers.GRU(64),
    layers.Dense(1)
])
gru.compile("adam", "mse")
gru.fit(seq_tr, y_tr_np, validation_data=(seq_va, y_va_np),
        epochs=40, batch_size=32, verbose=0, callbacks=[early])
_store("gru", y_valid, gru.predict(seq_va, verbose=0).flatten(), gru)

# -----------------------------------------------------------------
# 32) CNN‑LSTM hybrid
# -----------------------------------------------------------------
cnn_lstm = models.Sequential([
    layers.Input((n_feats,1)),
    layers.Conv1D(32, 3, activation="relu"),
    layers.MaxPooling1D(2),
    layers.LSTM(32),
    layers.Dense(1)
])
cnn_lstm.compile("adam", "mse")
cnn_lstm.fit(seq_tr, y_tr_np, validation_data=(seq_va, y_va_np),
             epochs=40, batch_size=32, verbose=0, callbacks=[early])
_store("cnn_lstm", y_valid, cnn_lstm.predict(seq_va, verbose=0).flatten(), cnn_lstm)

# -----------------------------------------------------------------
# Summary table for 27‑32
# -----------------------------------------------------------------
(pd.DataFrame({k:v for k,v in METRICS.items()
               if k in ["mlp","cnn","rnn","lstm","gru","cnn_lstm"]})
   .T.style.format({c:"{:.4f}" for c in ["mse","rmse","mae","r2"]}))

mlp      → {'mse': '6599214.6867', 'rmse': '2568.8937', 'mae': '1838.4512', 'r2': '-0.0238'}
cnn      → {'mse': '6598449.3013', 'rmse': '2568.7447', 'mae': '1838.2647', 'r2': '-0.0237'}
rnn      → {'mse': '6597679.6455', 'rmse': '2568.5949', 'mae': '1838.2439', 'r2': '-0.0235'}
lstm     → {'mse': '6598126.1046', 'rmse': '2568.6818', 'mae': '1838.2400', 'r2': '-0.0236'}
gru      → {'mse': '6598059.7938', 'rmse': '2568.6689', 'mae': '1838.2763', 'r2': '-0.0236'}
cnn_lstm → {'mse': '6598518.4748', 'rmse': '2568.7582', 'mae': '1838.3151', 'r2': '-0.0237'}


Unnamed: 0,mse,rmse,mae,r2
mlp,6599214.6867,2568.8937,1838.4512,-0.0238
cnn,6598449.3013,2568.7447,1838.2647,-0.0237
rnn,6597679.6455,2568.5949,1838.2439,-0.0235
lstm,6598126.1046,2568.6818,1838.24,-0.0236
gru,6598059.7938,2568.6689,1838.2763,-0.0236
cnn_lstm,6598518.4748,2568.7582,1838.3151,-0.0237


In [None]:
!pip install keras-tcn darts gluonts tensorflow-probability

# ╔═════════════════════════════════════════════════════════════════════╗
# ║  Models 33‑39 : TCN, Transformer, N‑BEATS, DeepAR, STS, Seq2Seq, GP ║
# ╚═════════════════════════════════════════════════════════════════════╝
import numpy as np, pandas as pd, warnings, tensorflow as tf
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
warnings.filterwarnings("ignore")

# ───────────────────────────────────────────────
# Reuse numeric arrays & scaler from 27‑32 block
# (scaler, X_tr_np, X_va_np, seq_tr, seq_va, n_feats)
# ───────────────────────────────────────────────

def _store(name, y_true, y_pred, model, scaler=scaler):
    MODELS[name]  = (scaler, model)
    METRICS[name] = regression_metrics(y_true, y_pred)
    print(f"{name:<12} →", {k: f"{v:.4f}" for k,v in METRICS[name].items()})

early = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)

# ----------------------------------------------------------------------
# 33) TCN (keras‑tcn) ---------------------------------------------------
# ----------------------------------------------------------------------
try:
    from tcn import TCN
    tcn = tf.keras.Sequential([
        tf.keras.layers.Input((n_feats,1)),
        TCN(nb_filters=32, kernel_size=3, dilations=[1,2,4,8]),
        tf.keras.layers.Dense(1)
    ])
    tcn.compile("adam", "mse")
    tcn.fit(seq_tr, y_tr_np, validation_data=(seq_va, y_va_np),
            epochs=40, batch_size=32, verbose=0, callbacks=[early])
    _store("tcn", y_valid, tcn.predict(seq_va, verbose=0).flatten(), tcn)
except ImportError:
    print("tcn          ⚠️  skipped – install keras‑tcn to enable")

# ----------------------------------------------------------------------
# 34) Transformer Encoder (1 block) ------------------------------------
# ----------------------------------------------------------------------
def transformer_block(x, heads=4, d_model=64):
    attn_out = tf.keras.layers.MultiHeadAttention(num_heads=heads, key_dim=d_model)(x, x)
    x = tf.keras.layers.Add()([x, attn_out])
    x = tf.keras.layers.LayerNormalization()(x)
    ff = tf.keras.layers.Dense(d_model*4, activation="relu")(x)
    ff = tf.keras.layers.Dense(d_model)(ff)
    x = tf.keras.layers.Add()([x, ff])
    return tf.keras.layers.LayerNormalization()(x)

inp = tf.keras.Input((n_feats,1))
x   = transformer_block(inp, heads=4, d_model=32)
x   = tf.keras.layers.GlobalAveragePooling1D()(x)
out = tf.keras.layers.Dense(1)(x)
transf = tf.keras.Model(inp, out)
transf.compile("adam", "mse")
transf.fit(seq_tr, y_tr_np, validation_data=(seq_va, y_va_np),
           epochs=40, batch_size=32, verbose=0, callbacks=[early])
_store("transformer", y_valid, transf.predict(seq_va, verbose=0).flatten(), transf)

# ----------------------------------------------------------------------
# 35) N‑BEATS  (via darts) ---------------------------------------------
# ----------------------------------------------------------------------
try:
    from darts.models import NBEATSModel
    from darts import TimeSeries
    # Darts expects a univariate series ➜ use TARGET only
    ts_train = TimeSeries.from_series(train_df[TARGET])
    ts_val   = TimeSeries.from_series(valid_df[TARGET])

    nbeats = NBEATSModel(
        input_chunk_length=32,
        output_chunk_length=1,
        random_state=42,
        n_epochs=50,
        batch_size=32,
        pl_trainer_kwargs={"accelerator":"cpu", "enable_progress_bar":False}
    )
    nbeats.fit(ts_train, verbose=False)
    nbeats_pred = nbeats.predict(n=len(valid_df)).values().flatten()
    _store("nbeats", y_valid, nbeats_pred, nbeats)
except ImportError:
    print("nbeats       ⚠️  skipped – install darts to enable")

# ----------------------------------------------------------------------
# 36) DeepAR (gluonts) --------------------------------------------------
# ----------------------------------------------------------------------
try:
    from gluonts.mx.model.deepar import DeepAREstimator
    from gluonts.dataset.common import ListDataset
    import mxnet as mx
    deepar_train = ListDataset([{"start": str(train_df.index[0]), "target": train_df[TARGET].values}], freq="D")
    deepar_estim = DeepAREstimator(freq="D", prediction_length=1, context_length=30,
                                   trainer=mx.gluon.Trainer(epochs=20, ctx="cpu"))
    deepar_pred  = deepar_estim.train(deepar_train).predict(deepar_train)
    pred_vals    = list(deepar_pred)[0].samples.mean(axis=0)[-len(valid_df):]
    _store("deepar", y_valid, pred_vals, deepar_estim)
except ImportError:
    print("deepar       ⚠️  skipped – install gluonts to enable")

# ----------------------------------------------------------------------
# 37) Deep State‑Space   (TF‑Probability STS) --------------------------
# ----------------------------------------------------------------------
try:
    import tensorflow_probability as tfp
    tfd = tfp.distributions
    sts = tfp.sts
    level   = sts.LocalLevel(observed_time_series=y_tr_np)
    model_sts = sts.Sum([level], observed_time_series=y_tr_np)
    variational_post = tfp.sts.build_factored_surrogate_posterior(model_sts)
    _ = tfp.vi.fit_surrogate_posterior(model_sts, variational_post,
                                       optimizer=tf.optimizers.Adam(0.1),
                                       num_steps=200, sample_size=32, verbose=0)
    forecast_dist = tfp.sts.forecast(model_sts, variational_post,
                                     num_steps_forecast=len(valid_df))
    sts_pred = forecast_dist.mean().numpy().flatten()
    _store("sts", y_valid, sts_pred, (model_sts, variational_post))
except ImportError:
    print("sts          ⚠️  skipped – install tensorflow‑probability to enable")

# ----------------------------------------------------------------------
# 38) Seq2Seq LSTM  (encoder‑decoder) ----------------------------------
# ----------------------------------------------------------------------
enc_in = tf.keras.Input((n_feats,1))
enc_out, enc_state_h, enc_state_c = tf.keras.layers.LSTM(64, return_state=True)(enc_in)
enc_states = [enc_state_h, enc_state_c]

dec_in = tf.keras.Input((1,1))                       # start token (zeros)
dec_lstm = tf.keras.layers.LSTM(64, return_sequences=True, return_state=True)
dec_out, _, _ = dec_lstm(dec_in, initial_state=enc_states)
dec_out = tf.keras.layers.Dense(1)(dec_out)
seq2seq  = tf.keras.Model([enc_in, dec_in], dec_out[:,0,:])

seq2seq.compile("adam", "mse")
dummy_dec_tr = np.zeros((len(seq_tr),1,1))
seq2seq.fit([seq_tr, dummy_dec_tr], y_tr_np,
            validation_split=0.1, epochs=40, batch_size=32,
            verbose=0, callbacks=[early])

dummy_dec_va = np.zeros((len(seq_va),1,1))
_store("seq2seq", y_valid,
       seq2seq.predict([seq_va, dummy_dec_va], verbose=0).flatten(),
       seq2seq)

# ----------------------------------------------------------------------
# 39) Gaussian Process Regressor ---------------------------------------
kernel = C(1.0, (1e-3, 1e3)) * RBF(length_scale=10.0)
gpr = GaussianProcessRegressor(kernel=kernel, alpha=1e-2, n_restarts_optimizer=3, random_state=42)
gpr.fit(X_tr_np, y_tr_np)
_store("gpr", y_valid, gpr.predict(X_va_np), gpr)

# ----------------------------------------------------------------------
# Summary for 33‑39 (only those that ran)
# ----------------------------------------------------------------------
(pd.DataFrame({k:v for k,v in METRICS.items()
               if k in ["tcn","transformer","nbeats","deepar","sts","seq2seq","gpr"]})
   .T.style.format({c: "{:.4f}" for c in ["mse","rmse","mae","r2"]}))

Collecting darts
  Downloading darts-0.35.0-py3-none-any.whl.metadata (56 kB)
Collecting gluonts
  Downloading gluonts-0.16.1-py3-none-any.whl.metadata (9.8 kB)
Collecting holidays>=0.11.1 (from darts)
  Downloading holidays-0.73-py3-none-any.whl.metadata (38 kB)
Collecting nfoursid>=1.0.0 (from darts)
  Downloading nfoursid-1.0.1-py3-none-any.whl.metadata (1.9 kB)
Collecting pyod>=0.9.5 (from darts)
  Downloading pyod-2.0.5-py3-none-any.whl.metadata (46 kB)
Collecting shap>=0.40.0 (from darts)
  Downloading shap-0.47.2-cp311-cp311-macosx_11_0_arm64.whl.metadata (25 kB)
Collecting statsforecast>=1.4 (from darts)
  Downloading statsforecast-2.0.1-cp311-cp311-macosx_11_0_arm64.whl.metadata (29 kB)
Collecting xarray>=0.17.0 (from darts)
  Downloading xarray-2025.4.0-py3-none-any.whl.metadata (12 kB)
Collecting pytorch-lightning>=1.5.0 (from darts)
  Downloading pytorch_lightning-2.5.1.post0-py3-none-any.whl.metadata (20 kB)
Collecting tensorboardX>=2.1 (from darts)
  Downloading tensorboa

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [None]:
# ╔═════════════════════════════════════════════════════════════════════════╗
# ║  Models 40‑50 : Bayesian NN, QR‑Forest, MC‑Dropout, AEs, PCR, KPCR,     ║
# ║  Stacking, RL placeholders (47‑49), Stacking‑Blend (50)                ║
# ╚═════════════════════════════════════════════════════════════════════════╝
import numpy as np, pandas as pd, warnings, tensorflow as tf
from sklearn.pipeline        import Pipeline
from sklearn.decomposition   import PCA, KernelPCA
from sklearn.linear_model    import LinearRegression, RidgeCV
from sklearn.ensemble        import GradientBoostingRegressor, RandomForestRegressor, StackingRegressor
warnings.filterwarnings("ignore")

# numeric arrays & scaler from previous deep‑learning cell -----------------
# scaler, X_tr_np, X_va_np, y_tr_np, y_va_np, n_feats

def _store(name, y_true, y_pred, model, scaler=scaler):
    MODELS[name]  = (scaler, model)
    METRICS[name] = regression_metrics(y_true, y_pred)
    print(f"{name:<14s} →", {k: f"{v:.4f}" for k,v in METRICS[name].items()})

early = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)

# 40) Bayesian Neural Network (DenseFlipout) -------------------------------
try:
    import tensorflow_probability as tfp
    tfd = tfp.distributions
    bnn = tf.keras.Sequential([
        tf.keras.layers.Input((n_feats,)),
        tfp.layers.DenseFlipout(64, activation="relu"),
        tfp.layers.DenseFlipout(1)
    ])
    negloglik = lambda y, rv_y: -rv_y.log_prob(y)
    bnn.compile(tf.optimizers.Adam(0.01), negloglik)
    bnn.fit(X_tr_np, y_tr_np, epochs=60, batch_size=32,
            validation_data=(X_va_np, y_va_np), verbose=0, callbacks=[early])
    pred_dist = bnn(X_va_np)            # distribution
    _store("bayes_nn", y_valid, pred_dist.mean().numpy().flatten(), bnn)
except ImportError:
    print("bayes_nn       ⚠️  skipped – install tensorflow‑probability")

# 41) Quantile Regression Forest (GradientBoosting with 'quantile') --------
qrf = GradientBoostingRegressor(loss="quantile", alpha=0.5,
                                n_estimators=400, learning_rate=0.05,
                                max_depth=3, random_state=42)
qrf.fit(X_train.select_dtypes(np.number), y_train)
_store("qrf", y_valid, qrf.predict(X_valid.select_dtypes(np.number)), qrf)

# 42) Monte‑Carlo Dropout Ensemble (train once, predict many) --------------
mcd = tf.keras.Sequential([
    tf.keras.layers.Input((n_feats,)),
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1)
])
mcd.compile("adam", "mse")
mcd.fit(X_tr_np, y_tr_np, epochs=40, batch_size=32,
        validation_data=(X_va_np, y_va_np), verbose=0, callbacks=[early])

# mean of 30 stochastic forward passes
mc_preds = np.mean([mcd(X_va_np, training=True).numpy().flatten()
                    for _ in range(30)], axis=0)
_store("mc_dropout", y_valid, mc_preds, mcd)

# 43) Autoencoder bottleneck + Linear regression ---------------------------
enc = tf.keras.Sequential([
    tf.keras.layers.Input((n_feats,)),
    tf.keras.layers.Dense(32, activation="relu"),
    tf.keras.layers.Dense(16, activation="relu", name="bottleneck"),
    tf.keras.layers.Dense(n_feats)
])
enc.compile("adam", "mse")
enc.fit(X_tr_np, X_tr_np, epochs=40, batch_size=32,
        validation_split=0.1, verbose=0, callbacks=[early])

bottleneck_model = tf.keras.Model(enc.input, enc.get_layer("bottleneck").output)
Z_tr, Z_va = bottleneck_model(X_tr_np).numpy(), bottleneck_model(X_va_np).numpy()
lin = LinearRegression().fit(Z_tr, y_tr_np)
_store("autoenc_lin", y_valid, lin.predict(Z_va), (enc, lin))

# 44) Variational Autoencoder + Ridge head ---------------------------------
latent = 8
inputs = tf.keras.Input((n_feats,))
h      = tf.keras.layers.Dense(32, activation="relu")(inputs)
z_mean = tf.keras.layers.Dense(latent)(h)
z_logv = tf.keras.layers.Dense(latent)(h)
eps    = tf.keras.backend.random_normal(shape=(tf.shape(z_mean)[0], latent))
z      = z_mean + tf.exp(0.5 * z_logv) * eps
decoder= tf.keras.Sequential([
    tf.keras.layers.Input((latent,)),
    tf.keras.layers.Dense(32, activation="relu"),
    tf.keras.layers.Dense(n_feats)
])
vae    = tf.keras.Model(inputs, decoder(z))
kl_loss= -0.5 * tf.reduce_mean(1 + z_logv - tf.square(z_mean) - tf.exp(z_logv))
vae.add_loss(kl_loss)
vae.compile("adam", "mse")
vae.fit(X_tr_np, X_tr_np, epochs=40, batch_size=32,
        validation_split=0.1, verbose=0, callbacks=[early])
Z_tr_v = z_mean.numpy(); Z_va_v = tf.keras.Model(inputs, z_mean)(X_va_np).numpy()
ridge  = RidgeCV(alphas=np.logspace(-3,1,20)).fit(Z_tr_v, y_tr_np)
_store("vae_ridge", y_valid, ridge.predict(Z_va_v), (vae, ridge))

# 45) Principal Component Regression (PCR) ---------------------------------
pcr_pipe = Pipeline([
    ("pca",   PCA(n_components=0.95)),
    ("lin",   LinearRegression())
])
pcr_pipe.fit(X_train.select_dtypes(np.number), y_train)
_store("pcr", y_valid, pcr_pipe.predict(X_valid.select_dtypes(np.number)), pcr_pipe)

# 46) Kernel PCA + Ridge Regression ----------------------------------------
kpcr_pipe = Pipeline([
    ("kpca",  KernelPCA(n_components=50, kernel="rbf", fit_inverse_transform=True)),
    ("ridge", RidgeCV(alphas=np.logspace(-3,1,20)))
])
kpcr_pipe.fit(X_train.select_dtypes(np.number), y_train)
_store("kpcr", y_valid, kpcr_pipe.predict(X_valid.select_dtypes(np.number)), kpcr_pipe)

# 47‑49) RL models (Q‑learning / PPO / Actor‑Critic) -----------------------
print("rl_baselines  ⚠️  skipped – full RL training out of scope for notebook demo")

# 50) Simple Stacking Blend of top 3 linear/boosting models ----------------
stack = StackingRegressor(
    estimators=[
        ("ridge", MODELS["ridge"][1] if "ridge" in MODELS else RidgeCV()),
        ("gbr",   MODELS["gbr"]),
        ("lgb",   MODELS["lgb"])
    ],
    final_estimator=LinearRegression(),
    passthrough=True,
    n_jobs=-1
)
stack.fit(X_train.select_dtypes(np.number), y_train)
_store("stack", y_valid, stack.predict(X_valid.select_dtypes(np.number)), stack)

# -------------------------------------------------------------------------
# Show metrics for 40‑50 entries that actually ran
# -------------------------------------------------------------------------
(pd.DataFrame({k:v for k,v in METRICS.items()
               if k in ["bayes_nn","qrf","mc_dropout","autoenc_lin","vae_ridge",
                        "pcr","kpcr","stack"]})
   .T.style.format({c:"{:.4f}" for c in ["mse","rmse","mae","r2"]}))

In [None]:
# ╔═════════  Model — LightGBM  ═════════╗
from lightgbm import LGBMRegressor as ESTIMATOR

def search_space(t):
    return {
        "n_estimators": t.suggest_int("n_est", 300, 900, 300),
        "learning_rate": t.suggest_float("lr", 0.01, 0.2, log=True),
        "max_depth": t.suggest_int("max_d", 3, 8),
    }

study = optuna.create_study(direction="minimize")
study.optimize(lambda tr: safe_cv_score(ESTIMATOR, search_space(tr),
                                        X_train_n, y_train),
               n_trials=15, show_progress_bar=False)
best = search_space(study.best_trial)

lgb_model = Pipeline([("sc", StandardScaler()),
                      ("m", ESTIMATOR(**best))]).fit(X_train_n, y_train)
MODELS["lgb"] = lgb_model
METRICS["lgb"] = reg_metrics(y_valid, lgb_model.predict(X_valid_n))
print("✓ LGBM", METRICS["lgb"])

[I 2025-05-27 23:47:19,886] A new study created in memory with name: no-name-c560ff85-a9e4-493e-a67a-aeb1e10f3b6d


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000505 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -4.650882
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000133 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score 17.390371
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000215 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start training f

[I 2025-05-27 23:47:21,607] Trial 0 finished with value: 2174.2683966022887 and parameters: {'n_est': 900, 'lr': 0.1842333240167576, 'max_d': 8}. Best is trial 0 with value: 2174.2683966022887.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000161 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -4.650882
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000140 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score 17.390371
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000195 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start training f

[I 2025-05-27 23:47:22,163] Trial 1 finished with value: 1919.2990901526573 and parameters: {'n_est': 300, 'lr': 0.06860668785542172, 'max_d': 7}. Best is trial 1 with value: 1919.2990901526573.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000114 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -4.650882
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000129 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score 17.390371
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000143 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start training f

[I 2025-05-27 23:47:23,770] Trial 2 finished with value: 2024.2806255386797 and parameters: {'n_est': 900, 'lr': 0.06992815027904678, 'max_d': 6}. Best is trial 1 with value: 1919.2990901526573.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000166 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -4.650882
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000166 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score 17.390371
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000133 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start training f

[I 2025-05-27 23:47:24,309] Trial 3 finished with value: 1862.149272355107 and parameters: {'n_est': 300, 'lr': 0.03429142300889541, 'max_d': 7}. Best is trial 3 with value: 1862.149272355107.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000167 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -4.650882
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000143 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score 17.390371
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000148 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start training f

[I 2025-05-27 23:47:25,828] Trial 4 finished with value: 1868.3311410677584 and parameters: {'n_est': 900, 'lr': 0.012779244382924753, 'max_d': 5}. Best is trial 3 with value: 1862.149272355107.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000180 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -4.650882
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000129 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score 17.390371
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000112 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start training f

[I 2025-05-27 23:47:26,436] Trial 5 finished with value: 1869.0200310123057 and parameters: {'n_est': 300, 'lr': 0.036737740287976196, 'max_d': 5}. Best is trial 3 with value: 1862.149272355107.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000155 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -4.650882
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000107 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score 17.390371
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000146 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start training f

[I 2025-05-27 23:47:27,562] Trial 6 finished with value: 2094.435733792533 and parameters: {'n_est': 600, 'lr': 0.1249708838829165, 'max_d': 8}. Best is trial 3 with value: 1862.149272355107.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000166 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -4.650882
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000140 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score 17.390371
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000084 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162

[I 2025-05-27 23:47:28,125] Trial 7 finished with value: 1898.1191227760237 and parameters: {'n_est': 300, 'lr': 0.06346259077311225, 'max_d': 6}. Best is trial 3 with value: 1862.149272355107.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000142 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -4.650882
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000145 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score 17.390371
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000166 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start training f

[I 2025-05-27 23:47:29,150] Trial 8 finished with value: 2100.12585976222 and parameters: {'n_est': 600, 'lr': 0.18193848269087667, 'max_d': 6}. Best is trial 3 with value: 1862.149272355107.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000072 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -4.650882
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000135 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score 17.390371
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000128 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162

[I 2025-05-27 23:47:29,586] Trial 9 finished with value: 1988.0202480183189 and parameters: {'n_est': 300, 'lr': 0.12003888606207462, 'max_d': 3}. Best is trial 3 with value: 1862.149272355107.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000203 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -4.650882
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000121 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score 17.390371
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000175 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start training f

[I 2025-05-27 23:47:30,364] Trial 10 finished with value: 1889.0128711362981 and parameters: {'n_est': 600, 'lr': 0.022596286869435423, 'max_d': 3}. Best is trial 3 with value: 1862.149272355107.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000149 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -4.650882
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000104 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score 17.390371
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000120 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start training f

[I 2025-05-27 23:47:31,745] Trial 11 finished with value: 1877.9212889001126 and parameters: {'n_est': 900, 'lr': 0.01194179408703706, 'max_d': 4}. Best is trial 3 with value: 1862.149272355107.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000201 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -4.650882
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000151 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score 17.390371
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000150 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start training f

[I 2025-05-27 23:47:33,341] Trial 12 finished with value: 1866.2379626085458 and parameters: {'n_est': 900, 'lr': 0.011792288087140224, 'max_d': 5}. Best is trial 3 with value: 1862.149272355107.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000122 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -4.650882
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000148 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score 17.390371
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000111 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start training f

[I 2025-05-27 23:47:34,678] Trial 13 finished with value: 1873.8399829288633 and parameters: {'n_est': 600, 'lr': 0.022412563013406783, 'max_d': 7}. Best is trial 3 with value: 1862.149272355107.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000092 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -4.650882
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000120 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score 17.390371
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000155 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start training f

[I 2025-05-27 23:47:35,941] Trial 14 finished with value: 1879.0393305894477 and parameters: {'n_est': 600, 'lr': 0.022375216088487734, 'max_d': 7}. Best is trial 3 with value: 1862.149272355107.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000264 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1910
[LightGBM] [Info] Number of data points in the train set: 268, number of used features: 22
[LightGBM] [Info] Start training from score 158.400861
✓ LGBM {'mse': 7038284.988940356, 'rmse': np.float64(2652.976628042614)}


In [None]:
# ╔═════════  Model — CatBoost  ═════════╗
from catboost import CatBoostRegressor as ESTIMATOR

def search_space(t):
    return {
        "iterations": t.suggest_int("it", 300, 800, 250),
        "depth": t.suggest_int("depth", 4, 8),
        "learning_rate": t.suggest_float("lr", 5e-3, 0.2, log=True),
        "loss_function": "RMSE",
        "verbose": 0,
    }

study = optuna.create_study(direction="minimize")
study.optimize(lambda tr: safe_cv_score(ESTIMATOR, search_space(tr),
                                        X_train_n, y_train),
               n_trials=10, show_progress_bar=False)
best = search_space(study.best_trial)

cat_model = Pipeline([("sc", StandardScaler()),
                      ("m", ESTIMATOR(**best))]).fit(X_train_n, y_train)
MODELS["cat"] = cat_model
METRICS["cat"] = reg_metrics(y_valid, cat_model.predict(X_valid_n))
print("✓ CatBoost", METRICS["cat"])

[I 2025-05-27 23:47:36,374] A new study created in memory with name: no-name-fd6393ff-7647-4747-8352-cd9f7f581223
[I 2025-05-27 23:47:37,866] Trial 0 finished with value: 1781.8511912279191 and parameters: {'it': 300, 'depth': 7, 'lr': 0.08334107393523266}. Best is trial 0 with value: 1781.8511912279191.
[I 2025-05-27 23:47:38,707] Trial 1 finished with value: 1816.0377821469374 and parameters: {'it': 550, 'depth': 4, 'lr': 0.011556241712229963}. Best is trial 0 with value: 1781.8511912279191.
[I 2025-05-27 23:47:39,593] Trial 2 finished with value: 1874.9480605223873 and parameters: {'it': 550, 'depth': 5, 'lr': 0.006251396775245591}. Best is trial 0 with value: 1781.8511912279191.
[I 2025-05-27 23:47:40,677] Trial 3 finished with value: 1925.6250763967196 and parameters: {'it': 300, 'depth': 7, 'lr': 0.00509628608169575}. Best is trial 0 with value: 1781.8511912279191.
[I 2025-05-27 23:47:41,294] Trial 4 finished with value: 1739.8923422929538 and parameters: {'it': 300, 'depth': 5, 

✓ CatBoost {'mse': 5755352.754685604, 'rmse': np.float64(2399.031628529646)}


In [None]:
# ╔═════════  Model — XGBoost  ═════════╗
from xgboost import XGBRegressor as ESTIMATOR

def search_space(t):
    return {
        "n_estimators": t.suggest_int("n_est", 300, 900, 300),
        "eta": t.suggest_float("eta", 0.01, 0.3, log=True),
        "max_depth": t.suggest_int("max_d", 3, 8),
        "subsample": 0.8,
        "objective": "reg:squarederror",
    }

study = optuna.create_study(direction="minimize")
study.optimize(lambda tr: safe_cv_score(ESTIMATOR, search_space(tr),
                                        X_train_n, y_train),
               n_trials=15, show_progress_bar=False)
best = search_space(study.best_trial)

xgb_model = Pipeline([("sc", StandardScaler()),
                      ("m", ESTIMATOR(**best))]).fit(X_train_n, y_train)
MODELS["xgb"] = xgb_model
METRICS["xgb"] = reg_metrics(y_valid, xgb_model.predict(X_valid_n))
print("✓ XGB", METRICS["xgb"])

[I 2025-05-27 23:47:52,897] A new study created in memory with name: no-name-a1a02b12-c0c6-4cc8-88cc-43b96b1a37d5
[I 2025-05-27 23:47:53,669] Trial 0 finished with value: 1807.7120406198128 and parameters: {'n_est': 300, 'eta': 0.1248302232632442, 'max_d': 3}. Best is trial 0 with value: 1807.7120406198128.
[I 2025-05-27 23:47:55,140] Trial 1 finished with value: 2028.742297030135 and parameters: {'n_est': 300, 'eta': 0.17257556085640552, 'max_d': 6}. Best is trial 0 with value: 1807.7120406198128.
[I 2025-05-27 23:47:58,321] Trial 2 finished with value: 1766.0407214051993 and parameters: {'n_est': 900, 'eta': 0.0590699617539345, 'max_d': 4}. Best is trial 2 with value: 1766.0407214051993.
[I 2025-05-27 23:47:59,062] Trial 3 finished with value: 1738.1725895291652 and parameters: {'n_est': 300, 'eta': 0.19821416000063105, 'max_d': 3}. Best is trial 3 with value: 1738.1725895291652.
[I 2025-05-27 23:48:00,540] Trial 4 finished with value: 2035.1429747106222 and parameters: {'n_est': 300

✓ XGB {'mse': 5784561.788921852, 'rmse': np.float64(2405.111595939334)}


In [None]:
# ╔═════════  Model — GARCH  ═════════╗
ESTIMATOR = GARCHReg

def search_space(t):
    return {"p": t.suggest_int("p", 1, 2),
            "q": t.suggest_int("q", 1, 2)}

study = optuna.create_study(direction="minimize")
study.optimize(lambda tr: safe_cv_score(ESTIMATOR, search_space(tr),
                                        X_train_n, y_train),
               n_trials=6, show_progress_bar=False)
best = search_space(study.best_trial)

garch_model = ESTIMATOR(**best).fit(X_train_n, y_train)
MODELS["garch"] = garch_model
METRICS["garch"] = reg_metrics(y_valid, garch_model.predict(X_valid_n))
print("✓ GARCH", METRICS["garch"])

[I 2025-05-27 23:48:25,892] A new study created in memory with name: no-name-9af4523d-3e52-45a4-8f42-8bce1a721b3a
[I 2025-05-27 23:48:25,927] Trial 0 finished with value: 1967.616097140397 and parameters: {'p': 2, 'q': 1}. Best is trial 0 with value: 1967.616097140397.
[I 2025-05-27 23:48:25,956] Trial 1 finished with value: 1967.616097140397 and parameters: {'p': 2, 'q': 1}. Best is trial 0 with value: 1967.616097140397.
[I 2025-05-27 23:48:25,989] Trial 2 finished with value: 1967.616097140397 and parameters: {'p': 2, 'q': 2}. Best is trial 0 with value: 1967.616097140397.
[I 2025-05-27 23:48:26,017] Trial 3 finished with value: 1967.616097140397 and parameters: {'p': 2, 'q': 1}. Best is trial 0 with value: 1967.616097140397.
[I 2025-05-27 23:48:26,050] Trial 4 finished with value: 1967.616097140397 and parameters: {'p': 2, 'q': 2}. Best is trial 0 with value: 1967.616097140397.
[I 2025-05-27 23:48:26,071] Trial 5 finished with value: 1967.616097140397 and parameters: {'p': 1, 'q': 1

✓ GARCH {'mse': 6598262.758982091, 'rmse': np.float64(2568.708383406355)}


In [None]:
# ╔═════════  Model — MLP (Keras)  ═════════╗
from scikeras.wrappers import KerasRegressor as ESTIMATOR
n_feat = X_train_n.shape[1]

def search_space(t):
    hid = t.suggest_int("hid", 32, 128, 32)
    lay = t.suggest_int("lay", 1, 3)
    return {
        "model": lambda: build_mlp(n_feat, hid, lay),
        "epochs": 20,
        "batch_size": 32,
        "verbose": 0
    }

# 1) Tune
study = optuna.create_study(direction="minimize")
study.optimize(
    lambda tr: safe_cv_score(ESTIMATOR, search_space(tr), X_train_n, y_train),
    n_trials=10,
    show_progress_bar=False
)
best = search_space(study.best_trial)
best.update(epochs=50)  # final fit longer

# 2) Fit pipeline
mlp_pipeline = Pipeline([
    ("sc", StandardScaler()),
    ("m",  ESTIMATOR(**best))
])
mlp_pipeline.fit(X_train_n, y_train)

# 3) Manual predict to sidestep sklearn-tags bug
scaler = mlp_pipeline.named_steps["sc"]
kr     = mlp_pipeline.named_steps["m"]
Xv_s   = scaler.transform(X_valid_n)
preds  = kr.predict(Xv_s)

MODELS["mlp"]  = mlp_pipeline
METRICS["mlp"] = reg_metrics(y_valid, preds)
print("✓ MLP", METRICS["mlp"])

[I 2025-05-27 23:48:26,085] A new study created in memory with name: no-name-3d3ed0dc-7754-4cfb-bad9-04e0482edaff
[I 2025-05-27 23:48:27,101] Trial 0 finished with value: 1000000000000.0 and parameters: {'hid': 32, 'lay': 1}. Best is trial 0 with value: 1000000000000.0.
[I 2025-05-27 23:48:27,693] Trial 1 finished with value: 1000000000000.0 and parameters: {'hid': 128, 'lay': 1}. Best is trial 0 with value: 1000000000000.0.
[I 2025-05-27 23:48:28,271] Trial 2 finished with value: 1000000000000.0 and parameters: {'hid': 96, 'lay': 1}. Best is trial 0 with value: 1000000000000.0.
[I 2025-05-27 23:48:28,848] Trial 3 finished with value: 1000000000000.0 and parameters: {'hid': 64, 'lay': 1}. Best is trial 0 with value: 1000000000000.0.
[I 2025-05-27 23:48:30,183] Trial 4 finished with value: 1000000000000.0 and parameters: {'hid': 96, 'lay': 1}. Best is trial 0 with value: 1000000000000.0.
[I 2025-05-27 23:48:30,868] Trial 5 finished with value: 1000000000000.0 and parameters: {'hid': 64,

✓ MLP {'mse': 6597489.291249107, 'rmse': np.float64(2568.557823224758)}


In [None]:
# ╔══════  Model — HuberRegressor  ══════╗
from sklearn.linear_model import HuberRegressor as ESTIMATOR

def search_space(trial):
    return {
        "alpha":   trial.suggest_float("alpha", 1e-5, 1e-1, log=True),
        "epsilon": trial.suggest_float("eps",   1.1,   2.0),
    }

study = optuna.create_study(direction="minimize")
study.optimize(
    lambda t: safe_cv_score(ESTIMATOR, search_space(t), X_train_n, y_train),
    n_trials=15, show_progress_bar=False
)
best = search_space(study.best_trial)

hub_model = Pipeline([
    ("sc", StandardScaler()),
    ("m",  ESTIMATOR(**best))
]).fit(X_train_n, y_train)

MODELS["huber"]  = hub_model
METRICS["huber"] = reg_metrics(y_valid, hub_model.predict(X_valid_n))
print("✓ HuberRegressor", METRICS["huber"])

[I 2025-05-27 23:48:35,008] A new study created in memory with name: no-name-21a56f6e-0f53-4597-8b9c-4a1e4fbffd1a
[I 2025-05-27 23:48:35,065] Trial 0 finished with value: 1867.6214867154563 and parameters: {'alpha': 0.08732883102574136, 'eps': 1.8759803783150082}. Best is trial 0 with value: 1867.6214867154563.
[I 2025-05-27 23:48:35,081] Trial 1 finished with value: 1743.1731325031333 and parameters: {'alpha': 0.03653208119787995, 'eps': 1.1323586321125785}. Best is trial 1 with value: 1743.1731325031333.
[I 2025-05-27 23:48:35,095] Trial 2 finished with value: 1817.066555451247 and parameters: {'alpha': 0.06244458400073522, 'eps': 1.833719756753466}. Best is trial 1 with value: 1743.1731325031333.
[I 2025-05-27 23:48:35,133] Trial 3 finished with value: 165.1116901809786 and parameters: {'alpha': 0.001040702955694006, 'eps': 1.7749767612476368}. Best is trial 3 with value: 165.1116901809786.
[I 2025-05-27 23:48:35,172] Trial 4 finished with value: 106.52095120552285 and parameters: {

✓ HuberRegressor {'mse': 7354609.163414213, 'rmse': np.float64(2711.9382668885023)}


In [None]:
# ╔══════  Model — RANSACRegressor  ══════╗
from sklearn.linear_model import RANSACRegressor as ESTIMATOR

# minimal tuning—mostly use defaults
def search_space(trial):
    return {
        "min_samples": trial.suggest_float("min_smpl", 0.5, 0.9),
        "residual_threshold": trial.suggest_float("thr", 1e2, 1e4, log=True)
    }

study = optuna.create_study(direction="minimize")
study.optimize(
    lambda t: safe_cv_score(ESTIMATOR, search_space(t), X_train_n, y_train),
    n_trials=10, show_progress_bar=False
)
best = search_space(study.best_trial)

ras_model = Pipeline([
    ("sc", StandardScaler()),
    ("m",  ESTIMATOR(**best))
]).fit(X_train_n, y_train)

MODELS["ransac"]  = ras_model
METRICS["ransac"] = reg_metrics(y_valid, ras_model.predict(X_valid_n))
print("✓ RANSACRegressor", METRICS["ransac"])

[I 2025-05-27 23:48:35,511] A new study created in memory with name: no-name-a9a757a8-3226-4774-b66f-0a52dc36b256
[I 2025-05-27 23:48:35,586] Trial 0 finished with value: 3.7760265293367707e-11 and parameters: {'min_smpl': 0.6393284046420378, 'thr': 1121.4383981927347}. Best is trial 0 with value: 3.7760265293367707e-11.
[I 2025-05-27 23:48:35,598] Trial 1 finished with value: 3.7760265293367707e-11 and parameters: {'min_smpl': 0.6666228701720301, 'thr': 719.5973571199643}. Best is trial 0 with value: 3.7760265293367707e-11.
[I 2025-05-27 23:48:35,609] Trial 2 finished with value: 3.7760265293367707e-11 and parameters: {'min_smpl': 0.6218272272518601, 'thr': 989.2207513826997}. Best is trial 0 with value: 3.7760265293367707e-11.
[I 2025-05-27 23:48:35,622] Trial 3 finished with value: 3.7760265293367707e-11 and parameters: {'min_smpl': 0.7442020741710073, 'thr': 563.0723066811651}. Best is trial 0 with value: 3.7760265293367707e-11.
[I 2025-05-27 23:48:35,634] Trial 4 finished with val

✓ RANSACRegressor {'mse': 3.833144617590385e-21, 'rmse': np.float64(6.19123947008221e-11)}


In [None]:
# ╔══════  Model — QuantileRegressor  ══════╗
from sklearn.linear_model import QuantileRegressor as ESTIMATOR

# Predict the median (quantile=0.5), tune regularization
def search_space(trial):
    return {
        "quantile": 0.5,
        "alpha":    trial.suggest_float("alpha", 1e-3, 1.0, log=True)
    }

study = optuna.create_study(direction="minimize")
study.optimize(
    lambda t: safe_cv_score(ESTIMATOR, search_space(t), X_train_n, y_train),
    n_trials=10, show_progress_bar=False
)
best = search_space(study.best_trial)

qr_model = Pipeline([
    ("sc", StandardScaler()),
    ("m",  ESTIMATOR(**best))
]).fit(X_train_n, y_train)

MODELS["quantile"]  = qr_model
METRICS["quantile"] = reg_metrics(y_valid, qr_model.predict(X_valid_n))
print("✓ QuantileRegressor", METRICS["quantile"])

[I 2025-05-27 23:48:35,699] A new study created in memory with name: no-name-ed65c811-1188-47d6-b264-4feb216fc779
[I 2025-05-27 23:48:35,854] Trial 0 finished with value: 1972.5110069971056 and parameters: {'alpha': 0.2114579706711702}. Best is trial 0 with value: 1972.5110069971056.
[I 2025-05-27 23:48:35,904] Trial 1 finished with value: 584.8020828869871 and parameters: {'alpha': 0.04887274238886442}. Best is trial 1 with value: 584.8020828869871.
[I 2025-05-27 23:48:35,946] Trial 2 finished with value: 1.1610091790027805e-11 and parameters: {'alpha': 0.0018638122121041778}. Best is trial 2 with value: 1.1610091790027805e-11.
[I 2025-05-27 23:48:35,998] Trial 3 finished with value: 1.8185349036673402e-11 and parameters: {'alpha': 0.005384192554635809}. Best is trial 2 with value: 1.1610091790027805e-11.
[I 2025-05-27 23:48:36,034] Trial 4 finished with value: 1972.5110069971056 and parameters: {'alpha': 0.2515191162066764}. Best is trial 2 with value: 1.1610091790027805e-11.
[I 2025

✓ QuantileRegressor {'mse': 7.320485368943791e-22, 'rmse': np.float64(2.7056395489687444e-11)}


In [None]:
# ╔══════  Model — LGBM (Quantile)  ══════╗
from lightgbm import LGBMRegressor as ESTIMATOR

def search_space(trial):
    return {
        "objective":     "quantile",
        "alpha":         trial.suggest_float("alpha", 0.1, 0.9),
        "n_estimators":  trial.suggest_int("n_est", 300, 900, 300),
        "learning_rate": trial.suggest_float("lr",   1e-2, 2e-1, log=True),
        "max_depth":     trial.suggest_int("max_d", 3,   8),
    }

study = optuna.create_study(direction="minimize")
study.optimize(
    lambda t: safe_cv_score(ESTIMATOR, search_space(t), X_train_n, y_train),
    n_trials=15, show_progress_bar=False
)
best = search_space(study.best_trial)

lgbq_model = Pipeline([
    ("sc", StandardScaler()),
    ("m",  ESTIMATOR(**best))
]).fit(X_train_n, y_train)

MODELS["lgb_quant"]  = lgbq_model
METRICS["lgb_quant"] = reg_metrics(y_valid, lgbq_model.predict(X_valid_n))
print("✓ LGBM Quantile", METRICS["lgb_quant"])

[I 2025-05-27 23:48:36,195] A new study created in memory with name: no-name-f6d9b0ad-fa0b-48f2-8f33-0b22d1e6ed7f


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000317 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -60.465622
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000131 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score -20.458912
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000168 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 1

[I 2025-05-27 23:48:37,288] Trial 0 finished with value: 1968.6686509467736 and parameters: {'alpha': 0.518903380326407, 'n_est': 600, 'lr': 0.07737506411298326, 'max_d': 4}. Best is trial 0 with value: 1968.6686509467736.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000161 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -176.371994
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000156 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score -317.197388
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000170 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start traini

[I 2025-05-27 23:48:39,118] Trial 1 finished with value: 2005.859107958374 and parameters: {'alpha': 0.44575013722390155, 'n_est': 900, 'lr': 0.015319435978074801, 'max_d': 6}. Best is trial 0 with value: 1968.6686509467736.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000204 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -849.924377
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000163 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score -840.527771
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000137 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start traini

[I 2025-05-27 23:48:39,676] Trial 2 finished with value: 2107.418636637916 and parameters: {'alpha': 0.29679304400881296, 'n_est': 300, 'lr': 0.04487083427471647, 'max_d': 6}. Best is trial 0 with value: 1968.6686509467736.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000158 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score 708.953308
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000129 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score 837.732361
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000128 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start training

[I 2025-05-27 23:48:40,787] Trial 3 finished with value: 2131.48810186639 and parameters: {'alpha': 0.7575860650843332, 'n_est': 600, 'lr': 0.02022385031534193, 'max_d': 5}. Best is trial 0 with value: 1968.6686509467736.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000172 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -1291.728394
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000117 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score -1438.692627
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000170 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start trai

[I 2025-05-27 23:48:42,518] Trial 4 finished with value: 2409.964967987339 and parameters: {'alpha': 0.14045579681625453, 'n_est': 900, 'lr': 0.18171648701724066, 'max_d': 6}. Best is trial 0 with value: 1968.6686509467736.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000170 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -808.834290
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000135 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score -792.317139
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000127 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start traini

[I 2025-05-27 23:48:43,647] Trial 5 finished with value: 2073.2486336147563 and parameters: {'alpha': 0.31422913695081356, 'n_est': 600, 'lr': 0.0534145099306485, 'max_d': 6}. Best is trial 0 with value: 1968.6686509467736.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000181 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -1295.222656
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000175 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score -1463.683960
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000150 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start trai

[I 2025-05-27 23:48:44,212] Trial 6 finished with value: 2387.620947293467 and parameters: {'alpha': 0.1332031290480807, 'n_est': 300, 'lr': 0.19271068540543007, 'max_d': 6}. Best is trial 0 with value: 1968.6686509467736.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000144 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -1303.456055
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000184 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score -1494.831177
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000186 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start trai

[I 2025-05-27 23:48:45,634] Trial 7 finished with value: 2435.4556980071534 and parameters: {'alpha': 0.12411244169932543, 'n_est': 900, 'lr': 0.06172175221882317, 'max_d': 3}. Best is trial 0 with value: 1968.6686509467736.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000167 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score 903.446106
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000110 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score 1293.163574
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000159 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start trainin

[I 2025-05-27 23:48:46,699] Trial 8 finished with value: 2197.7071106447647 and parameters: {'alpha': 0.8211231494806528, 'n_est': 600, 'lr': 0.07299047980003401, 'max_d': 4}. Best is trial 0 with value: 1968.6686509467736.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000180 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score -1180.079590
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000124 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score -1313.574463
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000262 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start trai

[I 2025-05-27 23:48:47,274] Trial 9 finished with value: 2361.9969830565765 and parameters: {'alpha': 0.16202822585207716, 'n_est': 300, 'lr': 0.03002923149146807, 'max_d': 4}. Best is trial 0 with value: 1968.6686509467736.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000097 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score 233.974136
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000110 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score 436.620178
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000174 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 1

[I 2025-05-27 23:48:48,456] Trial 10 finished with value: 1962.1199492613682 and parameters: {'alpha': 0.6325628084385941, 'n_est': 600, 'lr': 0.09703592662422474, 'max_d': 8}. Best is trial 10 with value: 1962.1199492613682.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000180 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score 269.176880
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000113 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score 441.430756
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000157 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start training

[I 2025-05-27 23:48:49,620] Trial 11 finished with value: 1958.1908740828467 and parameters: {'alpha': 0.6411148635283305, 'n_est': 600, 'lr': 0.10890433406954343, 'max_d': 8}. Best is trial 11 with value: 1958.1908740828467.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000161 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score 266.603058
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000161 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score 440.310638
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000113 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start training

[I 2025-05-27 23:48:50,870] Trial 12 finished with value: 1951.0872142610333 and parameters: {'alpha': 0.6405821978866775, 'n_est': 600, 'lr': 0.11412919215066505, 'max_d': 8}. Best is trial 12 with value: 1951.0872142610333.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000207 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score 385.827179
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000169 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score 570.774780
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000163 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start training

[I 2025-05-27 23:48:52,081] Trial 13 finished with value: 1955.1286610778964 and parameters: {'alpha': 0.6637102488501619, 'n_est': 600, 'lr': 0.14346612636206166, 'max_d': 8}. Best is trial 12 with value: 1951.0872142610333.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000195 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 424
[LightGBM] [Info] Number of data points in the train set: 56, number of used features: 21
[LightGBM] [Info] Start training from score 573.390625
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000183 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 21
[LightGBM] [Info] Start training from score 612.692810
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000143 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1172
[LightGBM] [Info] Number of data points in the train set: 162, number of used features: 21
[LightGBM] [Info] Start training

[I 2025-05-27 23:48:52,738] Trial 14 finished with value: 1970.7851340341977 and parameters: {'alpha': 0.6911486592883296, 'n_est': 300, 'lr': 0.1277504378833279, 'max_d': 8}. Best is trial 12 with value: 1951.0872142610333.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000157 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1910
[LightGBM] [Info] Number of data points in the train set: 268, number of used features: 22
[LightGBM] [Info] Start training from score 598.150391
✓ LGBM Quantile {'mse': 6310830.880423117, 'rmse': np.float64(2512.136716109041)}


In [None]:
!pip install keras-tcn



In [None]:
# ╔══════  Model — CNN-LSTM (Keras)  ══════╗
from tensorflow.keras import layers, models

n_feat = X_train_n.shape[1]

# 1) Scale & reshape
scaler_cnn = StandardScaler().fit(X_train_n)
Xtr_seq    = scaler_cnn.transform(X_train_n).reshape(-1, n_feat, 1)
Xv_seq     = scaler_cnn.transform(X_valid_n).reshape(-1, n_feat, 1)

# 2) Build & compile
cnn_lstm = models.Sequential([
    layers.Input((n_feat,1)),
    layers.Conv1D(32, 3, activation="relu", padding="causal"),
    layers.MaxPool1D(2),
    layers.LSTM(32),
    layers.Dense(1)
])
cnn_lstm.compile(optimizer="adam", loss="mse")

# 3) Train
cnn_lstm.fit(Xtr_seq, y_train, epochs=30, batch_size=64, verbose=0)

# 4) Predict & record
preds = cnn_lstm.predict(Xv_seq).flatten()

MODELS["cnn_lstm"]  = (scaler_cnn, cnn_lstm)   # tuple, not a Pipeline
METRICS["cnn_lstm"] = reg_metrics(y_valid, preds)
print("✓ CNN-LSTM", METRICS["cnn_lstm"])

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
✓ CNN-LSTM {'mse': 6604364.789555228, 'rmse': np.float64(2569.895871344835)}


In [None]:
# ╔══════  Model — KernelRidge  ══════╗
from sklearn.kernel_ridge import KernelRidge

# 1) Hyper-opt for alpha & kernel width
def krr_score(alpha, gamma):
    model = KernelRidge(alpha=alpha, kernel="rbf", gamma=gamma)
    return safe_cv_score(KernelRidge, {"alpha":alpha,"kernel":"rbf","gamma":gamma},
                         X_train_n, y_train)

# quick grid via Optuna
study = optuna.create_study(direction="minimize")
study.optimize(
    lambda t: safe_cv_score(KernelRidge,
                            {"alpha":t.suggest_float("alpha",1e-3,10,log=True),
                             "kernel":"rbf",
                             "gamma":t.suggest_float("gamma",1e-4,1,log=True)},
                            X_train_n, y_train),
    n_trials=15, show_progress_bar=False
)
best = study.best_trial.params

# 2) Fit final
kr_model = KernelRidge(**best).fit(X_train_n, y_train)

MODELS["krr"]  = kr_model
METRICS["krr"] = reg_metrics(y_valid, kr_model.predict(X_valid_n))
print("✓ KernelRidge", METRICS["krr"])

[I 2025-05-27 23:48:56,463] A new study created in memory with name: no-name-023a22b8-c7db-4aa9-90a0-bc2416b5d42a
[I 2025-05-27 23:48:56,498] Trial 0 finished with value: 1893.2771286489817 and parameters: {'alpha': 0.8591759056035563, 'gamma': 0.008055564216589541}. Best is trial 0 with value: 1893.2771286489817.
[I 2025-05-27 23:48:56,507] Trial 1 finished with value: 1315.8360151058305 and parameters: {'alpha': 0.008220250745497572, 'gamma': 0.0006689785706654685}. Best is trial 1 with value: 1315.8360151058305.
[I 2025-05-27 23:48:56,518] Trial 2 finished with value: 1926.000041265764 and parameters: {'alpha': 1.1420766146936918, 'gamma': 0.00289041816580354}. Best is trial 1 with value: 1315.8360151058305.
[I 2025-05-27 23:48:56,528] Trial 3 finished with value: 1962.5068815866466 and parameters: {'alpha': 0.6650312922966853, 'gamma': 0.355827455050104}. Best is trial 1 with value: 1315.8360151058305.
[I 2025-05-27 23:48:56,538] Trial 4 finished with value: 1482.0459181787598 and 

✓ KernelRidge {'mse': 9.95764665239198e-14, 'rmse': np.float64(3.1555739022231725e-07)}


In [None]:
# ╔══════  Model — GaussianProcessRegressor  ══════╗
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

# 1) Kernel & model
kernel = C(1.0, (1e-2, 1e2)) * RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e2))
gpr = GaussianProcessRegressor(kernel=kernel, alpha=1e-2, normalize_y=True)

# 2) Fit
gpr.fit(X_train_n, y_train)

# 3) Predict
preds = gpr.predict(X_valid_n)

MODELS["gpr"]  = gpr
METRICS["gpr"] = reg_metrics(y_valid, preds)
print("✓ GPR", METRICS["gpr"])

✓ GPR {'mse': 6747000.922510626, 'rmse': np.float64(2597.498974496549)}


In [None]:
# ╔══════  Model — ExtraTreesRegressor  ══════╗
from sklearn.ensemble import ExtraTreesRegressor

def search_space(t):
    return {
        "n_estimators": t.suggest_int("n_est",100,500,100),
        "max_depth":    t.suggest_int("max_d",  4, 12),
        "min_samples_leaf": t.suggest_int("min_leaf",1,5)
    }

study = optuna.create_study(direction="minimize")
study.optimize(
    lambda tr: safe_cv_score(ExtraTreesRegressor, search_space(tr),
                             X_train_n, y_train),
    n_trials=15, show_progress_bar=False
)
best = search_space(study.best_trial)

et_model = Pipeline([
    ("sc", StandardScaler()),
    ("m",  ExtraTreesRegressor(**best))
]).fit(X_train_n, y_train)

MODELS["et"]  = et_model
METRICS["et"] = reg_metrics(y_valid, et_model.predict(X_valid_n))
print("✓ ExtraTrees", METRICS["et"])

[I 2025-05-27 23:48:56,715] A new study created in memory with name: no-name-391c3a30-d5ee-4543-b51c-fe979ad9c9f3
[I 2025-05-27 23:48:57,264] Trial 0 finished with value: 1819.1355537685529 and parameters: {'n_est': 300, 'max_d': 6, 'min_leaf': 2}. Best is trial 0 with value: 1819.1355537685529.
[I 2025-05-27 23:48:57,739] Trial 1 finished with value: 1818.71541023101 and parameters: {'n_est': 300, 'max_d': 5, 'min_leaf': 2}. Best is trial 1 with value: 1818.71541023101.
[I 2025-05-27 23:48:58,460] Trial 2 finished with value: 1812.7783904100236 and parameters: {'n_est': 400, 'max_d': 9, 'min_leaf': 3}. Best is trial 2 with value: 1812.7783904100236.
[I 2025-05-27 23:48:58,775] Trial 3 finished with value: 1855.0973615658609 and parameters: {'n_est': 200, 'max_d': 6, 'min_leaf': 5}. Best is trial 2 with value: 1812.7783904100236.
[I 2025-05-27 23:48:59,515] Trial 4 finished with value: 1798.6752017323242 and parameters: {'n_est': 400, 'max_d': 8, 'min_leaf': 2}. Best is trial 4 with va

✓ ExtraTrees {'mse': 6667821.234610234, 'rmse': np.float64(2582.2124689130897)}


In [None]:
# ╔══════════  Summary & simple hybrids  ══════════╗
from IPython.display import display
import pandas as pd

def avg_wrap(m1, m2, w=0.5):
    class H(RegressorMixin, BaseEstimator):
        def __init__(self): self.m1, self.m2, self.w = clone(m1), clone(m2), w
        def fit(self, X, y): self.m1.fit(X, y); self.m2.fit(X, y); return self
        def predict(self, X): return self.w*self.m1.predict(X)+(1-self.w)*self.m2.predict(X)
    return H()

if {"xgb","lstm"}.issubset(MODELS):
    MODELS["hyb_xgb_lstm"] = avg_wrap(MODELS["xgb"], MODELS["lstm"], 0.6).fit(X_train_n, y_train)
    METRICS["hyb_xgb_lstm"] = reg_metrics(y_valid, MODELS["hyb_xgb_lstm"].predict(X_valid_n))

if {"cat","trf"}.issubset(MODELS):
    MODELS["hyb_cat_trf"]  = avg_wrap(MODELS["cat"], MODELS["trf"]).fit(X_train_n, y_train)
    METRICS["hyb_cat_trf"] = reg_metrics(y_valid, MODELS["hyb_cat_trf"].predict(X_valid_n))

summary = pd.DataFrame(METRICS).T.sort_values("rmse")
display(summary.style.format("{:.2f}"))

Unnamed: 0,mse,rmse,mae,r2
quantile,0.0,0.0,,
ransac,0.0,0.0,,
ols,0.0,0.0,0.0,1.0
krr,0.0,0.0,,
arimax,0.0,0.0,0.0,1.0
ridge,7088.17,84.19,46.33,1.0
lasso,578552.23,760.63,422.61,0.91
elastic,4570186.8,2137.8,1176.21,0.29
cat,5755352.75,2399.03,,
xgb,5784561.79,2405.11,,


In [None]:
# ╔══════════════════════════════════════════════════════════════════════╗
# ║  Master back‑test loop for *all* models in MODELS (up to 50+)        ║
# ║  Outputs: equity, return %, Sharpe, max drawdown, net P&L            ║
# ╚══════════════════════════════════════════════════════════════════════╝
import pandas as pd, numpy as np, inspect, tensorflow as tf, warnings
warnings.filterwarnings("ignore")

# ───────────────────────────────────────────────────────────────────────
# 0)  Preparation – drop stray 'time' column and keep numeric features
# ───────────────────────────────────────────────────────────────────────
bt_df = backtest_df.drop(columns=['time'], errors='ignore').copy()

# ───────────────────────────────────────────────────────────────────────
# 1) Helper: single‑row prediction for MANY model types
# ───────────────────────────────────────────────────────────────────────
def single_pred(model, scaler, row_np, row_seq):
    """
    model  : estimator or keras model
    scaler : StandardScaler or None
    row_np : shape (1, n_feats)      –> for MLP / sklearn models
    row_seq: shape (1, n_feats, 1)   –> for CNN/RNN/LSTM/TCN, etc.
    """
    # keras‑style -------------------------------------------------------
    if isinstance(model, tf.keras.Model):
        try:
            out = model.predict(row_seq, verbose=0)
        except ValueError:
            out = model.predict(row_np,  verbose=0)
        return float(out.reshape(-1)[0])

    # statsmodels ARIMA / SARIMA / ARIMAX ------------------------------
    if hasattr(model, "forecast"):
        try:
            return float(model.forecast(steps=1)[0])
        except Exception:
            pass

    # arch GARCH -------------------------------------------------------
    if "arch" in str(type(model)):
        return float(model.forecast(horizon=1, reindex=False).mean.iloc[-1, 0])

    # VARResults -------------------------------------------------------
    if hasattr(model, "y"):
        try:
            last_obs = model.y[-model.k_ar:]
            return float(model.forecast(last_obs, steps=1)[0,0])
        except Exception:
            pass

    # sklearn / xgboost / lightgbm / catboost --------------------------
    if hasattr(model, "predict"):
        return float(model.predict(row_np)[0])

    raise TypeError("Model type not recognised for on‑the‑fly prediction.")

# ───────────────────────────────────────────────────────────────────────
# 2) Patch Strategy that calls single_pred
# ───────────────────────────────────────────────────────────────────────
from backtesting import Backtest, Strategy
class AnyModelStrategy(Strategy):
    def init(self):
        self.feats  = self.feats_param
        self.model  = self.model_param
        self.scaler = self.scaler_param
        self.nf     = len(self.feats)

    def _pred(self):
        row_df   = self.data.df[self.feats].iloc[[-1]]
        row_np   = row_df.values.astype("float32")
        row_seq  = row_np.reshape(1, self.nf, 1)
        return single_pred(self.model, self.scaler, row_np, row_seq)

    def next(self):
        sig = np.sign(self._pred())
        if sig > 0 and not self.position.is_long:
            self.position.close(); self.buy()
        elif sig < 0 and not self.position.is_short:
            self.position.close(); self.sell()

# ───────────────────────────────────────────────────────────────────────
# 3) Back‑test each model in MODELS
# ───────────────────────────────────────────────────────────────────────
results = []
for name, obj in MODELS.items():
    print(f"▶ Back‑testing {name} …")
    # unwrap (scaler, model) tuple or use default scaler=None
    scaler, model = (obj if isinstance(obj, tuple) else (None, obj))

    # inject params into Strategy class variables
    AnyModelStrategy.feats_param  = numeric_feats
    AnyModelStrategy.model_param  = model
    AnyModelStrategy.scaler_param = scaler

    # Build Backtesting.py DataFrame with proper OHLC
    bt_data = (
        bt_df.assign(Open=bt_df['open'], High=bt_df['high'],
                     Low =bt_df['low'],  Close=bt_df['close'])
        .dropna(subset=['Open','High','Low','Close'])
    )

    try:
        bt = Backtest(bt_data, AnyModelStrategy,
                      cash=1e6, commission=0.001,
                      exclusive_orders=False)
        stat = bt.run()
        results.append({
            "model":        name,
            "Return [%]":   stat["Return [%]"],
            "Sharpe":       stat["Sharpe Ratio"],
            "Max DD [%]":   stat["Max. Drawdown [%]"],
            "Equity Final": stat["Equity Final [$]"],
            "PnL [$]":      stat["Equity Final [$]"] - stat["Equity Start [$]"],
            "Exposure [%]": stat["Exposure Time [%]"]
        })
    except Exception as e:
        print(f"  ⚠️  skipped {name} – {e}")

# ───────────────────────────────────────────────────────────────────────
# 4) Neat summary table
# ───────────────────────────────────────────────────────────────────────
summary = (pd.DataFrame(results)
             .set_index("model")
             .sort_values("Return [%]", ascending=False))

print("\n📊 Back‑test summary (PnL, Sharpe, DD, …)")
display(summary.style.format({
    "Return [%]": "{:.2f}%",
    "Sharpe": "{:.2f}",
    "Max DD [%]": "{:.2f}%",
    "Equity Final": "${:,.0f}",
    "PnL [$]": "${:,.0f}",
    "Exposure [%]": "{:.1f}%"
}))

In [None]:
# ╔════════════════════════════════════════════════════════════╗
# ║  Back‑testing helpers (Backtesting.py + ML models)         ║
# ╚════════════════════════════════════════════════════════════╝
import numpy as np
import pandas as pd
from backtesting import Backtest, Strategy
import tensorflow as tf                         # only needed if you use Keras nets
from sklearn.base import BaseEstimator, RegressorMixin

# ──────────────────────────────────────────────────────────────
# 1)  Wrapper so a (scaler , keras‑net) tuple behaves like an
#     sklearn estimator.  Use it for your cnn_lstm model.
# ──────────────────────────────────────────────────────────────
class CNNLSTMWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, scaler, net):
        self.scaler = scaler
        self.net    = net

    # optional ‑ keeps the sklearn API intact
    def fit(self, X, y):
        Xs = self.scaler.fit_transform(X).reshape(-1, X.shape[1], 1)
        self.net.fit(Xs, y, epochs=1, verbose=0)
        return self

    def predict(self, X):
        Xs = self.scaler.transform(X).reshape(-1, X.shape[1], 1)
        return self.net.predict(Xs, verbose=0).flatten()

# ... somewhere earlier you created
# scaler_cnn, cnn_lstm = (your scaler, your compiled Keras model)
# Replace the tuple entry in MODELS with the wrapped version:
# MODELS["cnn_lstm"] = CNNLSTMWrapper(scaler_cnn, cnn_lstm)


# ──────────────────────────────────────────────────────────────
# 2)  Generic back‑test runner
# ──────────────────────────────────────────────────────────────
def run_backtest(
    df_feat: pd.DataFrame,
    model,                       # any estimator with predict()
    features: list[str],         # column names used by the model
    window: int = 60,            # bars fed to RNN/CNN models
    cash: float = 1_000_000,
    commission: float = 0.001
):
    """
    df_feat   : dataframe with lower‑case 'open','high','low','close', plus feature cols
    model     : trained estimator (sklearn, keras, XGB, etc.)
    features  : list of feature column names
    """
    # 2‑a) Copy & rename OHLC so Backtesting.py sees what it expects
    bt_df = (
        df_feat
        .assign(Open  = df_feat['open'],
                High  = df_feat['high'],
                Low   = df_feat['low'],
                Close = df_feat['close'],
                Volume = df_feat.get('volume', np.nan))       # keep lowercase cols too
        .dropna(subset=['Open','High','Low','Close'])         # essential for bt engine
    )

    # 2‑b) Build Backtesting.py strategy on‑the‑fly
    class MLStrategy(Strategy):
        def init(self):
            self.model  = model
            self.window = window
            self.feats  = features

        # Single‑step prediction helper
        def _pred(self):
            # ---- Keras (CNN/LSTM) ----
            if isinstance(self.model, tf.keras.Model):
                if len(self.data) < self.window:
                    return 0.0                                    # not enough history yet
                x = self.data.df[self.feats].iloc[-self.window:].values
                return float(self.model.predict(x[np.newaxis], verbose=0)[0, 0])
            # ---- Everything else (XGB, RF, Linear, …) ----
            row = self.data.df[self.feats].iloc[-1].values.reshape(1, -1)
            return float(self.model.predict(row)[0])

        # Convert prediction → trading signal
        def next(self):
            sig = np.sign(self._pred())          # positive → long, negative → short
            if sig > 0 and not self.position.is_long:
                self.position.close(); self.buy()
            elif sig < 0 and not self.position.is_short:
                self.position.close(); self.sell()

    # 2‑c) Run and plot
    bt = Backtest(bt_df, MLStrategy,
                  cash=cash,
                  commission=commission,
                  exclusive_orders=False)

    stats = bt.run()
    # Uncomment to see the chart in‑notebook
    # bt.plot()
    return stats

In [None]:
# ╔══════════════════════════════════════╗
# ║  6.  Back‑testing wrapper strategy   ║
# ╚══════════════════════════════════════╝
class MLStrategy(Strategy):
    model_name = None          # filled by factory
    window     = 1             # 1 bar look‑ahead

    def init(self):
        self.model = MODELS[self.model_name]
        self.feats = FEATURES
        self._pred = self.I(self._model_pred, name="pred")

    def _model_pred(self):
        # last feature row → 2‑D array for sklearn
        row = self.data.df[self.feats].iloc[-1:].values
        return float(self.model.predict(row)[0])

    def next(self):
        pred = self._pred[-1]                 # predicted absolute change
        if pred > 0 and not self.position.is_long:
            self.position.close()
            self.buy()                        # long 1× cash
        elif pred < 0 and not self.position.is_short:
            self.position.close()
            self.sell()                       # short 1× cash

In [None]:
# ╔═════════════════════════════════════════════╗
# ║  8.  Compare PnL curves & risk metrics      ║
# ╚═════════════════════════════════════════════╝
equity_curves = pd.DataFrame({
    n: res["Equity Final [$]"].equity_curve for n, res in results.items()
}).fillna(method="ffill")

equity_curves.plot(figsize=(12,6), title="Strategy equity comparison")
plt.ylabel("Portfolio value ($)")
plt.show()

# Summary table
summary = pd.DataFrame({
    n: {
        "final_equity": res["Equity Final [$]"],
        "return_%":     (res["Equity Final [$]"] - 1_000_000)/1_000_000 * 100,
        "sharpe":       res["Sharpe Ratio"],
        "max_drawdown": res["Max Drawdown [%]"],
        **METRICS[n]    # mse, rmse, …
    }
    for n, res in results.items()
}).T.sort_values("return_%", ascending=False)

display(summary)

AttributeError: 'list' object has no attribute 'items'