## Run LightGBM model for comparison

### For running on a server: Need to pip install lightgbm

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Set working directory to '/n/groups/patel/shakson/aiready/'
import os
os.chdir("/home/shaksonisaac/CGM/mambatf/")

In [3]:
#LOAD Datasets
import pandas as pd
import io
from google.cloud import storage

_BUCKET_NAME = "cgmproject2025"

# Download dataset from GCS
client = storage.Client()
bucket = client.bucket(_BUCKET_NAME)
blob = bucket.blob('ai-ready/data/train_timeseries_meal.feather')
data_bytes = blob.download_as_bytes()
train = pd.read_feather(io.BytesIO(data_bytes))


# Download test set:
client = storage.Client()
bucket = client.bucket(_BUCKET_NAME)
blob = bucket.blob('ai-ready/data/test_timeseries_meal.feather')
data_bytes = blob.download_as_bytes()
test = pd.read_feather(io.BytesIO(data_bytes))

In [4]:
train.head()

Unnamed: 0,participant_id,ts,cgm_glucose,activity_steps,calories_value,heartrate,oxygen_saturation,respiration_rate,sleep_stage,stress_level,...,cgm_diff_lag_3,cgm_diff_lag_6,cgm_lagdiff_1_3,cgm_lagdiff_3_6,minute_of_day,tod_sin,tod_cos,cgm_rolling_mean,cgm_rolling_std,predmeal_flag
11,1023,2023-08-30 18:45:00+00:00,101.0,0.0,4.0,81.0,93.0,10.946,light,43.2,...,-1.0,-20.0,0.0,-19.0,55,0.237686,0.971342,118.166667,16.336425,0.0
12,1023,2023-08-30 18:50:00+00:00,94.0,0.0,4.0,77.0,93.0,14.588,light,-1.0,...,-11.0,-18.0,-4.0,-7.0,60,0.258819,0.965926,114.916667,16.983727,0.0
13,1023,2023-08-30 18:55:00+00:00,93.0,0.0,4.0,77.0,93.0,15.262,light,6.8,...,-9.0,-6.0,-8.0,3.0,65,0.279829,0.96005,111.25,16.52615,0.0
14,1023,2023-08-30 19:00:00+00:00,95.0,102.0,4.0,83.6,93.0,2.64,light,6.6,...,-6.0,-7.0,-8.0,-1.0,70,0.300706,0.953717,107.416667,14.164349,0.0
15,1023,2023-08-30 19:05:00+00:00,101.0,102.0,4.0,90.4,93.0,-1.0,light,-2.0,...,7.0,-4.0,1.0,-11.0,75,0.321439,0.94693,104.5,10.991732,0.0


In [5]:
# Load Data
import os
import sys

#from TFT_pytorch import log_memory, create_tft_dataloaders, TFT_train
from scripts.mamba288 import create_tft_dataloaders, TFT_train, save_tft_to_gcs, load_tft_from_gcs

  from .autonotebook import tqdm as notebook_tqdm
  warn(


In [None]:
# Rebuild the training dataset (same context_length, horizon, etc.)
#training, val_dataloader, train_dataloader, validation = create_tft_dataloaders(train, horizon=12, context_length=288, batchsize=32)

[2025-08-08 02:47:55.710099] Start of Dataloader Creation
GPU Mem allocated: 0.00 GB | reserved: 0.00 GB


In [8]:
# Variables for lightGBM training:
# static_categoricals = ["participant_id", "clinical_site", "study_group"]
# static_reals = ["age"]
# time_varying_known_categoricals = ["sleep_stage"]
# time_varying_known_reals = [
#     "ds", "minute_of_day", "tod_sin", "tod_cos", "activity_steps", "calories_value",
#     "heartrate", "oxygen_saturation", "respiration_rate", "stress_level", 'predmeal_flag',
# ]
# time_varying_unknown_reals = [
#     "cgm_glucose", "cgm_lag_1", "cgm_lag_3", "cgm_lag_6", "cgm_diff_lag_1", "cgm_diff_lag_3",
#     "cgm_diff_lag_6", "cgm_lagdiff_1_3", "cgm_lagdiff_3_6", "cgm_rolling_mean", "cgm_rolling_std",
# ]
# cut_off_date = train_df["ds"].max() - horizon
# training = TimeSeriesDataSet(
#     train_df[train_df["ds"] < cut_off_date],
#     time_idx="ds",
#     target="cgm_glucose",
#     group_ids=["participant_id"],

In [None]:
# Build LightGBM model:
import lightgbm as lgb
from sklearn.metrics import mean_absolute_error, root_mean_squared_error

# --- Categorical handling (match TFT static/known categoricals) ---
categorical_cols = ["participant_id", "clinical_site", "study_group", "sleep_stage"]
for df in (train, test):
    for c in categorical_cols:
        if c in df.columns:
            df[c] = df[c].astype("category")

# --- Feature selection (aligned with TFT) ---
features = [
    # Static
    "age", "participant_id", "clinical_site", "study_group",
    # Known
    "minute_of_day", "tod_sin", "tod_cos", "activity_steps", "calories_value",
    "heartrate", "oxygen_saturation", "respiration_rate", "stress_level", "predmeal_flag",
    "sleep_stage",
    # Lags and rolling
    "cgm_lag_1", "cgm_lag_3", "cgm_lag_6", "cgm_diff_lag_1", "cgm_diff_lag_3",
    "cgm_diff_lag_6", "cgm_lagdiff_1_3", "cgm_lagdiff_3_6", "cgm_rolling_mean", "cgm_rolling_std",
]
target = "cgm_glucose"

# --- Prepare data ---
X = train[features]
y = train[target]
# Drop rows with missing target
mask = y.notna()
X, y = X[mask], y[mask]

horizon = 12  # forecast horizon

# --- Time-based split: last `horizon` points per participant for validation ---
def time_series_split(df, group_col, time_col, horizon):
    train_idx = []
    val_idx = []
    for _, group in df.groupby(group_col):
        group = group.sort_values(time_col)
        if len(group) > horizon:
            train_idx.extend(group.index[:-horizon])
            val_idx.extend(group.index[-horizon:])
        else:
            train_idx.extend(group.index)
    return train_idx, val_idx

train_idx, val_idx = time_series_split(train.loc[mask], group_col="participant_id", time_col="ds", horizon=horizon)

X_train, y_train = X.loc[train_idx], y.loc[train_idx]
X_val, y_val     = X.loc[val_idx], y.loc[val_idx]

# --- LightGBM Datasets ---
lgb_train = lgb.Dataset(X_train, label=y_train, categorical_feature=[c for c in categorical_cols if c in X_train.columns])
lgb_val   = lgb.Dataset(X_val,   label=y_val,   reference=lgb_train, categorical_feature=[c for c in categorical_cols if c in X_val.columns])

# --- Train LightGBM ---
params = {
    "objective": "regression",
    "metric": "rmse",
    "boosting_type": "gbdt",
    "learning_rate": 0.05,
    "num_leaves": 31,
    "verbose": -1,
}

gbm = lgb.train(
    params,
    lgb_train,
    num_boost_round=1000,
    valid_sets=[lgb_train, lgb_val],
    valid_names=["train", "val"],
    callbacks=[
        lgb.early_stopping(stopping_rounds=50),
        lgb.log_evaluation(period=100),
    ],
)

  for _, group in df.groupby(group_col):


Training until validation scores don't improve for 50 rounds
[100]	train's rmse: 0.629415	val's rmse: 0.543507
[200]	train's rmse: 0.301148	val's rmse: 0.387352
[300]	train's rmse: 0.2714	val's rmse: 0.34404
[400]	train's rmse: 0.255194	val's rmse: 0.314976
[500]	train's rmse: 0.24495	val's rmse: 0.307844
[600]	train's rmse: 0.236914	val's rmse: 0.296024
[700]	train's rmse: 0.229923	val's rmse: 0.282366
[800]	train's rmse: 0.22422	val's rmse: 0.275573
[900]	train's rmse: 0.219104	val's rmse: 0.263219
[1000]	train's rmse: 0.214627	val's rmse: 0.259112
Did not meet early stopping. Best iteration is:
[1000]	train's rmse: 0.214627	val's rmse: 0.259112


In [None]:
# Build LightGBM model:
import lightgbm as lgb
from sklearn.metrics import mean_absolute_error, root_mean_squared_error

# --- Categorical handling (match TFT static/known categoricals) ---
categorical_cols = ["participant_id", "clinical_site", "study_group", "sleep_stage"]
for df in (train, test):
    for c in categorical_cols:
        if c in df.columns:
            df[c] = df[c].astype("category")

# --- Feature selection (aligned with TFT) ---
features = [
    # Static
    "age", "participant_id", "clinical_site", "study_group",
    # Known
    "minute_of_day", "tod_sin", "tod_cos", "activity_steps", "calories_value",
    "heartrate", "oxygen_saturation", "respiration_rate", "stress_level", "predmeal_flag",
    "sleep_stage",
    # Lags and rolling
    "cgm_lag_1", "cgm_lag_3", "cgm_lag_6", "cgm_diff_lag_1", "cgm_diff_lag_3",
    "cgm_diff_lag_6", "cgm_lagdiff_1_3", "cgm_lagdiff_3_6", "cgm_rolling_mean", "cgm_rolling_std",
]
target = "cgm_glucose"

# --- Prepare data ---
X = train[features]
y = train[target]
# Drop rows with missing target
mask = y.notna()
X, y = X[mask], y[mask]

import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, root_mean_squared_error

horizon = 12  # number of steps ahead
models = {}
val_rows = []   # collect per-horizon validation preds and targets

def smape(y_true, y_pred, eps=1e-8):
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    denom = np.maximum(np.abs(y_true) + np.abs(y_pred), eps)
    return np.mean(2.0 * np.abs(y_pred - y_true) / denom) * 100.0

# Train 12 independent models, one for each horizon h=1..12
for h in range(1, horizon + 1):
    # Shift target by -h within each participant (predict y at t+h from features at t)
    y_h = train.groupby("participant_id")[target].shift(-h)
    ds_target_h = train.groupby("participant_id")["ds"].shift(-h)  # target timestamp for bookkeeping

    # Build time-based split so inputs map to the last `horizon` target timestamps per group
    train_idx_h, val_idx_h = [], []
    for pid, g in train.groupby("participant_id"):
        g = g.sort_values("ds")
        n = len(g)
        # validation inputs i where i+h points to the last `horizon` target times
        # i ∈ [n - horizon - h, n - h)
        start = n - horizon - h
        end = n - h
        if start is not None and start >= 0:
            val_ids = g.index[start:end]
            val_idx_h.extend(val_ids.tolist())
        # training inputs are those with available target (i+h < n) and not in validation
        train_ids = g.index[: max(n - h, 0)]
        if start is not None and start >= 0:
            mask_not_val = ~pd.Index(train_ids).isin(val_ids)
            train_ids = pd.Index(train_ids)[mask_not_val]
        train_idx_h.extend(train_ids.tolist())

    # Drop any rows without target
    train_idx_h = [i for i in train_idx_h if pd.notna(y_h.loc[i])]
    val_idx_h   = [i for i in val_idx_h   if pd.notna(y_h.loc[i])]

    X_train_h, y_train_h = X.loc[train_idx_h], y_h.loc[train_idx_h]
    X_val_h,   y_val_h   = X.loc[val_idx_h],   y_h.loc[val_idx_h]

    # LightGBM datasets (categoricals preserved)
    lgb_train_h = lgb.Dataset(
        X_train_h, label=y_train_h,
        categorical_feature=[c for c in categorical_cols if c in X_train_h.columns]
    )
    lgb_val_h = lgb.Dataset(
        X_val_h, label=y_val_h, reference=lgb_train_h,
        categorical_feature=[c for c in categorical_cols if c in X_val_h.columns]
    )

    params = {
        "objective": "regression",
        "metric": "rmse",
        "boosting_type": "gbdt",
        "learning_rate": 0.05,
        "num_leaves": 31,
        "verbose": -1,
    }

    gbm_h = lgb.train(
        params,
        lgb_train_h,
        num_boost_round=1000,
        valid_sets=[lgb_train_h, lgb_val_h],
        valid_names=["train", f"val_h{h}"],
        callbacks=[
            lgb.early_stopping(stopping_rounds=50),
            lgb.log_evaluation(period=100),
        ],
    )
    models[h] = gbm_h

    # Predict and collect metrics for this horizon
    y_pred_h = gbm_h.predict(X_val_h, num_iteration=gbm_h.best_iteration)
    rmse_h = root_mean_squared_error(y_val_h, y_pred_h)
    mae_h = mean_absolute_error(y_val_h, y_pred_h)
    smape_h = smape(y_val_h, y_pred_h)
    print(f"H{h:02d} | RMSE: {rmse_h:.4f} | MAE: {mae_h:.4f} | sMAPE: {smape_h:.2f}%")

    # Save per-row results for summary
    rows = pd.DataFrame({
        "participant_id": train.loc[val_idx_h, "participant_id"].values,
        "input_ds": train.loc[val_idx_h, "ds"].values,          # input timestamp t
        "target_ds": ds_target_h.loc[val_idx_h].values,         # target timestamp t+h
        "horizon": h,
        "y_true": y_val_h.values,
        "y_pred": y_pred_h,
    })
    val_rows.append(rows)

# Concatenate validation predictions for all horizons
val_forecasts = pd.concat(val_rows, ignore_index=True)

# Aggregate overall metrics across horizons
overall_rmse = root_mean_squared_error(val_forecasts["y_true"], val_forecasts["y_pred"])
overall_mae  = mean_absolute_error(val_forecasts["y_true"], val_forecasts["y_pred"])
overall_smape = smape(val_forecasts["y_true"], val_forecasts["y_pred"])

print(f"Overall (H1–H{horizon}) RMSE:  {overall_rmse:.4f}")
print(f"Overall (H1–H{horizon}) MAE:   {overall_mae:.4f}")
print(f"Overall (H1–H{horizon}) sMAPE: {overall_smape:.2f}%")

# val_forecasts holds per-horizon predictions for the last 12 target timestamps per participant.
# Columns: participant_id, input_ds (t), target_ds (t+h), horizon, y_true, y_pred
# ...existing code...

In [14]:
X.head()

Unnamed: 0,age,participant_id,clinical_site,study_group,minute_of_day,tod_sin,tod_cos,activity_steps,calories_value,heartrate,...,cgm_lag_1,cgm_lag_3,cgm_lag_6,cgm_diff_lag_1,cgm_diff_lag_3,cgm_diff_lag_6,cgm_lagdiff_1_3,cgm_lagdiff_3_6,cgm_rolling_mean,cgm_rolling_std
11,67,1023,UW,insulin_dependent,55,0.237686,0.971342,0.0,4.0,81.0,...,102.0,102.0,121.0,-1.0,-1.0,-20.0,0.0,-19.0,118.166667,16.336425
12,67,1023,UW,insulin_dependent,60,0.258819,0.965926,0.0,4.0,77.0,...,101.0,105.0,112.0,-7.0,-11.0,-18.0,-4.0,-7.0,114.916667,16.983727
13,67,1023,UW,insulin_dependent,65,0.279829,0.96005,0.0,4.0,77.0,...,94.0,102.0,99.0,-1.0,-9.0,-6.0,-8.0,3.0,111.25,16.52615
14,67,1023,UW,insulin_dependent,70,0.300706,0.953717,102.0,4.0,83.6,...,93.0,101.0,102.0,2.0,-6.0,-7.0,-8.0,-1.0,107.416667,14.164349
15,67,1023,UW,insulin_dependent,75,0.321439,0.94693,102.0,4.0,90.4,...,95.0,94.0,105.0,6.0,7.0,-4.0,1.0,-11.0,104.5,10.991732


In [13]:
# --- Evaluate ---
import numpy as np
from sklearn.metrics import mean_absolute_error, root_mean_squared_error

def smape(y_true, y_pred, eps=1e-8):
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    denom = np.maximum(np.abs(y_true) + np.abs(y_pred), eps)
    return np.mean(2.0 * np.abs(y_pred - y_true) / denom) * 100.0

y_pred = gbm.predict(X_val, num_iteration=gbm.best_iteration)
rmse = root_mean_squared_error(y_val, y_pred)
mae = mean_absolute_error(y_val, y_pred)
smape_val = smape(y_val, y_pred)

print(f"Validation RMSE:  {rmse:.4f}")
print(f"Validation MAE:   {mae:.4f}")
print(f"Validation sMAPE: {smape_val:.2f}%")

Validation RMSE:  0.2591
Validation MAE:   0.1197
Validation sMAPE: 0.09%


## Try version below:

## Quantile Loss Forecasting

In [102]:
import io
import pandas as pd
import numpy as np
import lightgbm as lgb
from google.cloud import storage  # or however you’ve set up _gcs_client
from sklearn.metrics import mean_pinball_loss

# =============================
# CONSTANTS
# =============================
LAGS = [1, 3, 6]
ROLLING_WINDOW = 6   # window size for rolling stats
HORIZON = 12         # forecast steps
QUANTILES = [0.2, 0.5, 0.8]

FEATURES = [
    "age", "participant_id", "clinical_site", "study_group",
    "minute_of_day", "tod_sin", "tod_cos", "activity_steps", "calories_value",
    "heartrate", "oxygen_saturation", "respiration_rate", "stress_level", "predmeal_flag",
    "sleep_stage",
    *[f"cgm_lag_{lag}"      for lag in LAGS],
    *[f"cgm_diff_lag_{lag}" for lag in LAGS],
    "cgm_lagdiff_1_3", "cgm_lagdiff_3_6",
    "cgm_rolling_mean", "cgm_rolling_std",
]

CATEGORICAL_COLS = ["participant_id", "clinical_site", "study_group", "sleep_stage"]

# =============================
# DATA LOADING
# =============================
def load_data_from_gcs(bucket_name: str, key: str) -> pd.DataFrame:
    bucket = _gcs_client.bucket(bucket_name)
    blob = bucket.blob(key)
    data_bytes = blob.download_as_bytes()
    return pd.read_feather(io.BytesIO(data_bytes))

# =============================
# FEATURE ENGINEERING
# =============================
def create_features(df: pd.DataFrame) -> pd.DataFrame:
    df = df.sort_values(["participant_id", "ds"])
    # lag / diff features
    for lag in LAGS:
        df[f"cgm_lag_{lag}"]      = df.groupby("participant_id")["cgm_glucose"].shift(lag)
        df[f"cgm_diff_lag_{lag}"] = df.groupby("participant_id")["cgm_glucose"].diff(lag)
    df["cgm_lagdiff_1_3"] = df["cgm_lag_1"] - df["cgm_lag_3"]
    df["cgm_lagdiff_3_6"] = df["cgm_lag_3"] - df["cgm_lag_6"]
    # rolling stats
    df["cgm_rolling_mean"] = (
        df.groupby("participant_id")["cgm_glucose"]
          .transform(lambda x: x.shift(1).rolling(ROLLING_WINDOW).mean())
    )
    df["cgm_rolling_std"] = (
        df.groupby("participant_id")["cgm_glucose"]
          .transform(lambda x: x.shift(1).rolling(ROLLING_WINDOW).std())
    )
    return df

# =============================
# TRAIN/VALID SPLIT (group‐specific)
# =============================
def time_series_split(df: pd.DataFrame, group_col: str, time_col: str, horizon: int):
    train_idx, val_idx = [], []
    for _, grp in df.groupby(group_col):
        grp = grp.sort_values(time_col)
        cutoff_grp = grp[time_col].max() - horizon
        train_grp = grp[grp[time_col] < cutoff_grp]
        val_grp   = grp[grp[time_col] >=  cutoff_grp]
        train_idx.extend(train_grp.index)
        val_idx.extend(val_grp.index)
    return train_idx, val_idx

# =============================
# DIRECT QUANTILE FORECASTING ON VALIDATION SPLIT
# =============================
# def train_and_evaluate_quantiles_on_val(train: pd.DataFrame):
#     # 1) Feature‐engineer
#     train_feat = create_features(train.copy())

#     # 2) Cast categoricals once
#     for c in CATEGORICAL_COLS:
#         if c in train_feat:
#             train_feat[c] = train_feat[c].astype("category")

#     models    = {}
#     all_preds = []

#     # 3) Loop over horizons
#     for h in range(1, HORIZON + 1):
#         # a) build horizon‐specific target
#         df_h = train_feat.copy()
#         df_h[f"target_h_{h}"] = (
#             df_h.groupby("participant_id")["cgm_glucose"]
#                 .shift(-h)
#         )
#         df_h = df_h.dropna(subset=FEATURES + [f"target_h_{h}"])

#         # b) split to train / val
#         train_idx, val_idx = time_series_split(df_h, "participant_id", "ds", HORIZON)
#         X_train = df_h.loc[train_idx, FEATURES]
#         y_train = df_h.loc[train_idx, f"target_h_{h}"]
#         X_val   = df_h.loc[val_idx,   FEATURES]
#         y_val   = df_h.loc[val_idx,   f"target_h_{h}"]

#         # Pick the last val row per participant for validation
#         # This ensures we predict the last available target for each participant
#         val_df_h = df_h.loc[val_idx].copy()
#         last_val_idx = val_df_h.groupby("participant_id").tail(1).index

#         X_val_origin = X_val.loc[last_val_idx]
#         y_val_origin = y_val.loc[last_val_idx]


#         # c) cast categories in splits
#         for df_split in (X_train, X_val):
#             for c in CATEGORICAL_COLS:
#                 if c in df_split:
#                     df_split[c] = df_split[c].astype("category")

#         # d) train one model per quantile
#         for q in QUANTILES:
#             params = {
#                 "objective":     "quantile",
#                 "alpha":         q,
#                 "metric":        "quantile",
#                 "boosting_type": "gbdt",
#                 "learning_rate": 0.05,
#                 "num_leaves":    31,
#                 "verbose":      -1,
#             }
#             gbm = lgb.train(
#                 params,
#                 lgb.Dataset(X_train, label=y_train, categorical_feature=CATEGORICAL_COLS),
#                 valid_sets=[lgb.Dataset(X_val, label=y_val, categorical_feature=CATEGORICAL_COLS)],
#                 valid_names=["val"],
#                 callbacks=[lgb.early_stopping(50), lgb.log_evaluation(100)],
#             )
#             models[(h, q)] = gbm

#             # e) predict on the VALIDATION set
#             y_pred = gbm.predict(X_val_origin)
#             actual = y_val_origin.values

#             # f) record preds & truths
#             df_val_pred = pd.DataFrame({
#                 "participant_id":   X_val_origin["participant_id"].values,
#                 "ds":               df_h.loc[last_val_idx, "ds"].values,
#                 "forecast_horizon": h,
#                 "quantile":         q,
#                 "pred_cgm":         y_pred,
#                 "actual_cgm":       actual,
#             })
#             all_preds.append(df_val_pred)

#     # 4) aggregate
#     val_forecast_df = pd.concat(all_preds, ignore_index=True)
#     return models, val_forecast_df

def train_and_evaluate_quantiles_on_val(train: pd.DataFrame):
    # 1) Feature‐engineer once
    df = create_features(train.copy())
    for c in CATEGORICAL_COLS:
        if c in df.columns:
            df[c] = df[c].astype("category")

    # 2) Carve out the LAST HORIZON points for each subject
    train_idx, val_idx = time_series_split(df, "participant_id", "ds", HORIZON)
    df_train = df.loc[train_idx]
    df_val   = df.loc[val_idx]

    # 3) Pick the FIRST row of the validation window as YOUR SINGLE ORIGIN
    origin_idx = df_val.groupby("participant_id").head(1).index
    X_origin   = df.loc[origin_idx, FEATURES]

    models    = {}
    all_preds = []

    # 4) Loop over each look-ahead
    for h in range(1, HORIZON + 1):
        # a) build horizon‐h target on the TRAIN+VAL frame
        df_h = df.copy()
        df_h[f"target_h_{h}"] = (
            df_h.groupby("participant_id")["cgm_glucose"].shift(-h)
        )
        df_h = df_h.dropna(subset=FEATURES + [f"target_h_{h}"])

        # b) split again for EARLY-STOPPING
        tr_idx, va_idx = time_series_split(df_h, "participant_id", "ds", HORIZON)
        X_tr  = df_h.loc[tr_idx, FEATURES]
        y_tr  = df_h.loc[tr_idx, f"target_h_{h}"]
        X_val = df_h.loc[va_idx, FEATURES]
        y_val = df_h.loc[va_idx, f"target_h_{h}"]

        # c) cast cats on splits
        for X in (X_tr, X_val):
            for c in CATEGORICAL_COLS:
                if c in X:
                    X[c] = X[c].astype("category")

        # d) train one quantile model per q
        for q in QUANTILES:
            params = {
                "objective":     "quantile",
                "alpha":         q,
                "metric":        "quantile",
                "boosting_type": "gbdt",
                "learning_rate": 0.05,
                "num_leaves":    31,
                "verbose":      -1,
            }
            gbm = lgb.train(
                params,
                lgb.Dataset(X_tr,  label=y_tr,  categorical_feature=CATEGORICAL_COLS),
                valid_sets=[lgb.Dataset(X_val, label=y_val, categorical_feature=CATEGORICAL_COLS)],
                valid_names=["val"],
                callbacks=[lgb.early_stopping(50), lgb.log_evaluation(100)],
            )
            models[(h, q)] = gbm

            # e) PREDICT only from our SINGLE origin
            y_pred = gbm.predict(X_origin)

            # f) GRAB the true cgm at t+h for the same origin
            actual = (
                df.groupby("participant_id")["cgm_glucose"]
                  .shift(-h)
                  .loc[origin_idx]
                  .values
            )

            all_preds.append(pd.DataFrame({
                "participant_id":   df.loc[origin_idx, "participant_id"].values,
                "ds":               df.loc[origin_idx, "ds"].values,
                "forecast_horizon": h,
                "quantile":         q,
                "pred_cgm":         y_pred,
                "actual_cgm":       actual,
            }))

    forecast_df = pd.concat(all_preds, ignore_index=True)
    return models, forecast_df

# =============================
# USAGE EXAMPLE
# =============================
# train_df = load_data_from_gcs("my-bucket", "train.feather")
# models, val_forecast_df = train_and_evaluate_quantiles_on_val(train_df)
#
# # Compute pinball loss by horizon & quantile
# summary = (
#     val_forecast_df
#     .groupby(["forecast_horizon", "quantile"])
#     .apply(lambda df: mean_pinball_loss(df.actual_cgm, df.pred_cgm, alpha=df.quantile.iloc[0]))
#     .rename("pinball_loss")
# )
# print(summary)


In [103]:
# Takes 12 minutes
_BUCKET_NAME = "cgmproject2025"
_BASE_PREFIX = "models/predictions"
_gcs_client = storage.Client()
train = load_data_from_gcs(_BUCKET_NAME, 'ai-ready/data/train_timeseries_meal.feather')
test  = load_data_from_gcs(_BUCKET_NAME, 'ai-ready/data/test_timeseries_meal.feather')
models, forecast = train_and_evaluate_quantiles_on_val(train)

  for _, grp in df.groupby(group_col):
  origin_idx = df_val.groupby("participant_id").head(1).index
  df_h.groupby("participant_id")["cgm_glucose"].shift(-h)
  for _, grp in df.groupby(group_col):


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 1.92988
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 1.92988


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 1.47495
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 1.47495


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 1.26073
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 1.26073


  df.groupby("participant_id")["cgm_glucose"]
  df_h.groupby("participant_id")["cgm_glucose"].shift(-h)
  for _, grp in df.groupby(group_col):


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 2.18091
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 2.18091


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 2.52649
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 2.52649


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 1.96879
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 1.96879


  df.groupby("participant_id")["cgm_glucose"]
  df_h.groupby("participant_id")["cgm_glucose"].shift(-h)
  for _, grp in df.groupby(group_col):


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 2.65781
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 2.65781


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 3.48102
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 3.48102


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 2.69871
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 2.69871


  df.groupby("participant_id")["cgm_glucose"]
  df_h.groupby("participant_id")["cgm_glucose"].shift(-h)
  for _, grp in df.groupby(group_col):


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 3.07381
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 3.07381


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 4.24243
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 4.24243


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 3.2737
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 3.2737


  df.groupby("participant_id")["cgm_glucose"]
  df_h.groupby("participant_id")["cgm_glucose"].shift(-h)
  for _, grp in df.groupby(group_col):


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 3.39943
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 3.39943


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 4.80855
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 4.80855


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 3.6851
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 3.6851


  df.groupby("participant_id")["cgm_glucose"]
  df_h.groupby("participant_id")["cgm_glucose"].shift(-h)
  for _, grp in df.groupby(group_col):


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 3.63969
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 3.63969


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 5.15809
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 5.15809


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 3.95819
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 3.95819


  df.groupby("participant_id")["cgm_glucose"]
  df_h.groupby("participant_id")["cgm_glucose"].shift(-h)
  for _, grp in df.groupby(group_col):


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 3.80278
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 3.80278


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 5.47298
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 5.47298


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 4.20318
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 4.20318


  df.groupby("participant_id")["cgm_glucose"]
  df_h.groupby("participant_id")["cgm_glucose"].shift(-h)
  for _, grp in df.groupby(group_col):


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 3.9271
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 3.9271


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 5.7139
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 5.7139


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 4.41048
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 4.41048


  df.groupby("participant_id")["cgm_glucose"]
  df_h.groupby("participant_id")["cgm_glucose"].shift(-h)
  for _, grp in df.groupby(group_col):


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 4.08444
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 4.08444


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 6.00831
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 6.00831


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 4.67211
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 4.67211


  df.groupby("participant_id")["cgm_glucose"]
  df_h.groupby("participant_id")["cgm_glucose"].shift(-h)
  for _, grp in df.groupby(group_col):


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 4.23575
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 4.23575


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 6.25647
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 6.25647


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 4.91209
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 4.91209


  df.groupby("participant_id")["cgm_glucose"]
  df_h.groupby("participant_id")["cgm_glucose"].shift(-h)
  for _, grp in df.groupby(group_col):


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 4.36288
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 4.36288


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 6.51632
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 6.51632


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 5.18306
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 5.18306


  df.groupby("participant_id")["cgm_glucose"]
  df_h.groupby("participant_id")["cgm_glucose"].shift(-h)
  for _, grp in df.groupby(group_col):


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 4.46755
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 4.46755


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 6.76229
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 6.76229


  df.groupby("participant_id")["cgm_glucose"]


Training until validation scores don't improve for 50 rounds
[100]	val's quantile: 5.43953
Did not meet early stopping. Best iteration is:
[100]	val's quantile: 5.43953


  df.groupby("participant_id")["cgm_glucose"]


In [104]:
forecast

Unnamed: 0,participant_id,ds,forecast_horizon,quantile,pred_cgm,actual_cgm
0,1023,2831,1,0.2,137.355677,148.0
1,1024,2827,1,0.2,106.993882,108.0
2,1026,2549,1,0.2,156.378801,167.0
3,1027,2831,1,0.2,195.025989,283.0
4,1028,2829,1,0.2,118.965977,120.0
...,...,...,...,...,...,...
26671,7405,2831,12,0.8,169.602539,175.0
26672,7406,2831,12,0.8,182.644295,225.0
26673,7407,2831,12,0.8,167.345823,136.0
26674,7409,2831,12,0.8,165.537588,132.0


In [105]:
# Which participants does forecast have?
participants = forecast["participant_id"].unique()
print(f"Forecast contains {len(participants)} unique participants:")

Forecast contains 741 unique participants:


In [106]:
# Get forecasts of median quantile (0.5) for all horizons
median_forecast = forecast[forecast["quantile"] == 0.5]
median_forecast.head()

Unnamed: 0,participant_id,ds,forecast_horizon,quantile,pred_cgm,actual_cgm
741,1023,2831,1,0.5,139.604206,148.0
742,1024,2827,1,0.5,108.76891,108.0
743,1026,2549,1,0.5,163.38086,167.0
744,1027,2831,1,0.5,282.14148,283.0
745,1028,2829,1,0.5,121.344252,120.0


In [107]:
# Get the forecasts for a participant:
pid = participants[2]
pid_forecast = median_forecast[median_forecast["participant_id"] == pid]
pid_forecast

Unnamed: 0,participant_id,ds,forecast_horizon,quantile,pred_cgm,actual_cgm
743,1026,2549,1,0.5,163.38086,167.0
2966,1026,2549,2,0.5,166.512587,168.0
5189,1026,2549,3,0.5,165.233415,163.0
7412,1026,2549,4,0.5,164.569652,156.0
9635,1026,2549,5,0.5,161.164992,148.0
11858,1026,2549,6,0.5,157.370936,144.0
14081,1026,2549,7,0.5,151.52718,142.0
16304,1026,2549,8,0.5,146.03425,141.0
18527,1026,2549,9,0.5,144.423564,138.0
20750,1026,2549,10,0.5,142.090941,137.0


In [108]:
# Calculate MAE, SMAPE, RMSE for forecasts (Reconstruct vector 1-12 per participant)
from sklearn.metrics import mean_absolute_error, root_mean_squared_error
def smape(y_true, y_pred):
    return 100 * np.mean(
        2 * np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true) + 1e-8)
    )
def quantile_loss(y_true, y_pred, q=0.5):
    return np.mean(np.maximum(q * (y_true - y_pred), (q - 1) * (y_true - y_pred)))
mae = mean_absolute_error(median_forecast["actual_cgm"], median_forecast["pred_cgm"])
rmse = root_mean_squared_error(median_forecast["actual_cgm"], median_forecast["pred_cgm"]) 
smape_val = smape(median_forecast["actual_cgm"], median_forecast["pred_cgm"])
quantile_val = quantile_loss(median_forecast["actual_cgm"], median_forecast["pred_cgm"], q=0.5)
print(f"MAE: {mae:.4f}, RMSE: {rmse:.4f}, sMAPE: {smape_val:.2f}, Quantile Loss: {quantile_val:.4f}")


MAE: 9.9052, RMSE: 16.0745, sMAPE: 7.30, Quantile Loss: 4.9526


In [109]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, root_mean_squared_error

# your helper functions
def smape(y_true, y_pred):
    return 100 * np.mean(
        2 * np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true) + 1e-8)
    )

def quantile_loss(y_true, y_pred, q=0.5):
    return np.mean(
        np.maximum(q * (y_true - y_pred), (q - 1) * (y_true - y_pred))
    )

# assume val_forecast_df is your DF with columns
# ['participant_id','forecast_horizon','quantile','pred_cgm','actual_cgm']
median_forecast = forecast[forecast['quantile'] == 0.5]

# 1) find participants with a full 1–12 horizon vector
counts = (
    median_forecast
    .groupby('participant_id')['forecast_horizon']
    .nunique()
)
complete_ids = counts[counts == HORIZON].index

# 2) filter to only those participants
mf_full = median_forecast[median_forecast['participant_id'].isin(complete_ids)]

print(mf_full)

# 3) define a safe metrics function
def compute_metrics(df):
    # df should have exactly HORIZON rows
    df = df.sort_values('forecast_horizon')
    y_true = df['actual_cgm'].values
    y_pred = df['pred_cgm'].values
    return pd.Series({
        'MAE':            mean_absolute_error(y_true, y_pred),
        'RMSE':           root_mean_squared_error(y_true, y_pred),
        'sMAPE':          smape(y_true, y_pred),
        'Quantile_Loss':  quantile_loss(y_true, y_pred, q=0.5),
    })

# 4) group/apply
metrics_df = (
    mf_full
    .groupby('participant_id')
    .apply(compute_metrics)
    .reset_index()
)

print(metrics_df.head())



  .groupby('participant_id')['forecast_horizon']
  .groupby('participant_id')


      participant_id    ds  forecast_horizon  quantile    pred_cgm  actual_cgm
741             1023  2831                 1       0.5  139.604206       148.0
742             1024  2827                 1       0.5  108.768910       108.0
743             1026  2549                 1       0.5  163.380860       167.0
744             1027  2831                 1       0.5  282.141480       283.0
745             1028  2829                 1       0.5  121.344252       120.0
...              ...   ...               ...       ...         ...         ...
25930           7405  2831                12       0.5  161.709068       175.0
25931           7406  2831                12       0.5  158.552294       225.0
25932           7407  2831                12       0.5  133.761428       136.0
25933           7409  2831                12       0.5  137.842430       132.0
25934           7411  2601                12       0.5   87.629824        81.0

[8892 rows x 6 columns]
  participant_id        MAE

  .apply(compute_metrics)


In [110]:
metrics_df

# Compute average metrics across participants
overall_metrics = metrics_df.mean(numeric_only=True)
overall_metrics

MAE               9.905191
RMSE             11.612247
sMAPE             7.298937
Quantile_Loss     4.952596
dtype: float64

In [29]:
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error

# SMAPE helper
def smape(y_true, y_pred):
    denom = np.abs(y_true) + np.abs(y_pred)
    mask = denom != 0
    return 100 * np.mean(2 * np.abs(y_pred[mask] - y_true[mask]) / denom[mask])

# After calling your training function:
# models, forecast = train_and_direct_forecast(train, test)

# 1) Recreate categorical levels
categorical_cols = ["participant_id", "clinical_site", "study_group", "sleep_stage"]
train_feat = create_features(train.copy())
for c in categorical_cols:
    train_feat[c] = train_feat[c].astype("category")
cat_levels = {c: train_feat[c].cat.categories for c in categorical_cols}

# 2) Define FEATURES and HORIZON
FEATURES = [
    "age","participant_id","clinical_site","study_group",
    "minute_of_day","tod_sin","tod_cos","activity_steps","calories_value",
    "heartrate","oxygen_saturation","respiration_rate","stress_level","predmeal_flag",
    "sleep_stage",
] + [f"cgm_lag_{lag}" for lag in LAGS] + [f"cgm_diff_lag_{lag}" for lag in LAGS] + [
    "cgm_lagdiff_1_3","cgm_lagdiff_3_6","cgm_rolling_mean","cgm_rolling_std",
]
HORIZON = 12

# 3) Collect all true and predicted values from validation
all_true = []
all_pred = []
for h, model in models.items():
    df_h = create_features(train.copy())
    df_h["target_h"] = df_h.groupby("participant_id")["cgm_glucose"].shift(-h)
    df_h = df_h.dropna(subset=FEATURES + ["target_h"])
    
    _, val_idx = time_series_split(df_h, "participant_id", "ds", HORIZON)
    X_val = df_h.loc[val_idx, FEATURES].copy()
    y_true = df_h.loc[val_idx, "target_h"].values
    
    for c, cats in cat_levels.items():
        X_val[c] = pd.Categorical(X_val[c], categories=cats)
    
    y_pred = model.predict(X_val)
    all_true.append(y_true)
    all_pred.append(y_pred)

# 4) Flatten and compute pooled metrics
y_true_all = np.concatenate(all_true)
y_pred_all = np.concatenate(all_pred)

mae_final   = mean_absolute_error(y_true_all, y_pred_all)
rmse_final  = np.sqrt(mean_squared_error(y_true_all, y_pred_all))
smape_final = smape(y_true_all, y_pred_all)

print(f"Final MAE:   {mae_final:.3f}")
print(f"Final RMSE:  {rmse_final:.3f}")
print(f"Final SMAPE: {smape_final:.2f}%")


Final MAE:   5.738
Final RMSE:  6.587
Final SMAPE: 4.23%


In [41]:
all_true

[array([132., 132., 131., 131., 130., 129., 129., 128., 135., 146., 145.,
        145.]),
 array([132., 132., 131., 131., 130., 129., 129., 128., 135., 146., 145.,
        145.]),
 array([132., 132., 131., 131., 130., 129., 129., 128., 135., 146., 145.,
        145.]),
 array([132., 132., 131., 131., 130., 129., 129., 128., 135., 146., 145.,
        145.]),
 array([132., 132., 131., 131., 130., 129., 129., 128., 135., 146., 145.,
        145.]),
 array([132., 132., 131., 131., 130., 129., 129., 128., 135., 146., 145.,
        145.]),
 array([132., 132., 131., 131., 130., 129., 129., 128., 135., 146., 145.,
        145.]),
 array([132., 132., 131., 131., 130., 129., 129., 128., 135., 146., 145.,
        145.]),
 array([132., 132., 131., 131., 130., 129., 129., 128., 135., 146., 145.,
        145.]),
 array([132., 132., 131., 131., 130., 129., 129., 128., 135., 146., 145.,
        145.]),
 array([132., 132., 131., 131., 130., 129., 129., 128., 135., 146., 145.,
        145.]),
 array([13

In [None]:

mean_absolute_error(all_true[5], all_pred[5])  # Example for first horizon

6.047237929188614

In [30]:
all_true

[array([132., 132., 131., 131., 130., 129., 129., 128., 135., 146., 145.,
        145.]),
 array([132., 132., 131., 131., 130., 129., 129., 128., 135., 146., 145.,
        145.]),
 array([132., 132., 131., 131., 130., 129., 129., 128., 135., 146., 145.,
        145.]),
 array([132., 132., 131., 131., 130., 129., 129., 128., 135., 146., 145.,
        145.]),
 array([132., 132., 131., 131., 130., 129., 129., 128., 135., 146., 145.,
        145.]),
 array([132., 132., 131., 131., 130., 129., 129., 128., 135., 146., 145.,
        145.]),
 array([132., 132., 131., 131., 130., 129., 129., 128., 135., 146., 145.,
        145.]),
 array([132., 132., 131., 131., 130., 129., 129., 128., 135., 146., 145.,
        145.]),
 array([132., 132., 131., 131., 130., 129., 129., 128., 135., 146., 145.,
        145.]),
 array([132., 132., 131., 131., 130., 129., 129., 128., 135., 146., 145.,
        145.]),
 array([132., 132., 131., 131., 130., 129., 129., 128., 135., 146., 145.,
        145.]),
 array([13

In [31]:
all_pred

[array([134.77382528, 133.4189021 , 133.33526597, 132.37428059,
        132.5368327 , 131.57584732, 131.07391176, 131.18415089,
        130.22316551, 138.87583147, 150.2125759 , 142.15749683]),
 array([135.54047165, 134.1244634 , 133.66754693, 133.31522826,
        132.56151697, 132.75307763, 131.99936634, 131.47346478,
        131.64593656, 131.1942349 , 139.0044539 , 150.89203303]),
 array([135.30110587, 135.78093301, 135.30110587, 134.27619986,
        134.19349531, 134.19349531, 133.65850483, 133.65850483,
        133.32708349, 132.85005558, 132.85005558, 138.383407  ]),
 array([135.87091599, 135.87091599, 136.32109182, 135.87091599,
        135.02533554, 134.60532545, 134.60532545, 134.06841849,
        134.06841849, 134.06841849, 133.885947  , 133.56181035]),
 array([135.9738541 , 135.9738541 , 135.9738541 , 136.40847367,
        135.9738541 , 135.9738541 , 135.24393029, 135.24393029,
        134.76517433, 134.76517433, 134.12278638, 133.72761444]),
 array([135.31702578, 135.3170

In [None]:
# import numpy as np
# import pandas as pd
# from sklearn.metrics import mean_absolute_error, mean_squared_error

# # 1) Re-build the exact category levels you used for training
# categorical_cols = ["participant_id", "clinical_site", "study_group", "sleep_stage"]
# train_feat = create_features(train.copy())
# for c in categorical_cols:
#     if c in train_feat.columns:
#         train_feat[c] = train_feat[c].astype("category")
# # capture those category levels
# cat_levels = {c: train_feat[c].cat.categories for c in categorical_cols}

# # 2) Paste in your FEATURES list and HORIZON
# FEATURES = [
#     "age","participant_id","clinical_site","study_group",
#     "minute_of_day","tod_sin","tod_cos","activity_steps","calories_value",
#     "heartrate","oxygen_saturation","respiration_rate","stress_level","predmeal_flag",
#     "sleep_stage",
#     *[f"cgm_lag_{lag}"       for lag in LAGS],
#     *[f"cgm_diff_lag_{lag}"  for lag in LAGS],
#     "cgm_lagdiff_1_3","cgm_lagdiff_3_6","cgm_rolling_mean","cgm_rolling_std",
# ]
# HORIZON = 12

# # 3) SMAPE helper
# def smape(y_true, y_pred):
#     denom = (np.abs(y_true) + np.abs(y_pred))
#     mask  = denom != 0
#     return 100 * np.mean(2 * np.abs(y_pred[mask] - y_true[mask]) / denom[mask])

# # 4) Loop over each horizon, recast cat dtypes, predict & compute
# rows = []
# all_true = []
# all_pred = []

# for h, model in models.items():
#     # rebuild df_h & shifted target
#     df_h = create_features(train.copy())
#     df_h["target_h"] = df_h.groupby("participant_id")["cgm_glucose"].shift(-h)
#     df_h = df_h.dropna(subset=FEATURES + ["target_h"])
    
#     # train/val indices
#     _, val_idx = time_series_split(df_h, "participant_id", "ds", HORIZON)
#     X_val = df_h.loc[val_idx, FEATURES].copy()
#     y_val = df_h.loc[val_idx, "target_h"].values
    
#     # **re-apply the exact same categories** to X_val
#     for c, cats in cat_levels.items():
#         if c in X_val.columns:
#             X_val[c] = pd.Categorical(X_val[c], categories=cats)
    
#     # predict & metrics
#     y_pred = model.predict(X_val)  # now no mismatch error
#     rows.append({
#         "horizon":    h,
#         "MAE":        mean_absolute_error(y_val, y_pred),
#         "RMSE":       np.sqrt(mean_squared_error(y_val, y_pred)),
#         "SMAPE (%)":  smape(y_val, y_pred),
#     })
#     all_true.append(y_val)
#     all_pred.append(y_pred)

# # 5) Build & print a DataFrame
# metrics_df = pd.DataFrame(rows).sort_values("horizon")
# mean_row = {
#     "horizon":   "mean",
#     "MAE":       metrics_df["MAE"].mean(),
#     "RMSE":      metrics_df["RMSE"].mean(),
#     "SMAPE (%)": metrics_df["SMAPE (%)"].mean(),
# }
# # turn it into a one-row DataFrame…
# mean_df = pd.DataFrame([mean_row])
# metrics_df = pd.concat([metrics_df, mean_df], ignore_index=True)
# print(metrics_df)

# # metrics_df = metrics_df.append(mean_row, ignore_index=True)
# # print(metrics_df)

# # 6) (Optional) overall flattened metrics
# y_true_all = np.concatenate(all_true)
# y_pred_all = np.concatenate(all_pred)
# overall = {
#     "MAE":       mean_absolute_error(y_true_all, y_pred_all),
#     "RMSE":      np.sqrt(mean_squared_error(y_true_all, y_pred_all)),
#     "SMAPE (%)": smape(y_true_all, y_pred_all),
# }
# print("\nOverall:", overall)

   horizon       MAE      RMSE  SMAPE (%)
0        1  3.185758  3.575176   2.336300
1        2  4.456879  5.570424   3.267372
2        3  5.662729  6.581692   4.173450
3        4  6.304471  7.097604   4.644702
4        5  6.532071  7.262997   4.812305
5        6  6.047238  6.873490   4.454516
6        7  5.999257  6.991233   4.419611
7        8  6.041967  6.828557   4.450451
8        9  6.407882  7.119867   4.720003
9       10  6.221349  6.874591   4.582491
10      11  6.080318  6.688815   4.478426
11      12  5.921345  6.706073   4.361216
12    mean  5.738439  6.514210   4.225070

Overall: {'MAE': 5.738438558837852, 'RMSE': np.float64(6.587063417320037), 'SMAPE (%)': np.float64(4.225070252173662)}


In [None]:
# # 2) attach the prediction timestamps
# #    assume `test_df` has your actual CGM series, with columns ['participant_id','ds','cgm_glucose']
# last_ds = (
#     test
#     .groupby("participant_id")["ds"]
#     .max()
#     .reset_index()
#     .rename(columns={"ds":"ds_orig"})
# )
# forecast_df = (
#     forecast
#     .merge(last_ds, on="participant_id")
#     .assign(ds_pred=lambda df: df["ds_orig"] + df["forecast_horizon"])
# )

# # 3) merge preds with the true values
# actuals = (
#     test
#     .rename(columns={"ds":"ds_pred","cgm_glucose":"actual"})
#     [["participant_id","ds_pred","actual"]]
# )
# eval_df = forecast_df.merge(actuals, on=["participant_id","ds_pred"], how="inner")
# print(eval_df)

# # 4) loop horizons and compute metrics
# def smape(y_true, y_pred):
#     num = np.abs(y_true - y_pred)
#     denom = (np.abs(y_true) + np.abs(y_pred))
#     mask = denom != 0
#     return 100*np.mean(2*num[mask] / denom[mask])

# for h in sorted(eval_df["forecast_horizon"].unique()):
#     dfh = eval_df[eval_df["forecast_horizon"]==h]
#     y_true = dfh["actual"]
#     y_pred = dfh["pred_cgm"]
#     mae  = mean_absolute_error(y_true, y_pred)
#     rmse = np.sqrt(mean_squared_error(y_true, y_pred))
#     sm   = smape(y_true.values, y_pred)
#     print(f"h={h:2d} →  MAE: {mae:.3f},  RMSE: {rmse:.3f},  SMAPE: {sm:.2f}%")

Empty DataFrame
Columns: [participant_id, forecast_horizon, pred_cgm, ds_orig, ds_pred, actual]
Index: []
