### Environment Setup

This experiment is implemented using PyTorch within a dedicated Conda virtual environment (`oran311`) based on Python 3.11.

The development environment includes:

- Python 3.11
- PyTorch 2.x (CPU version)
- NumPy
- Pandas
- XGBoost


The following cell verifies the installed PyTorch version and checks whether GPU acceleration (CUDA) is available.

Since the current setup uses the CPU-only version of PyTorch, CUDA support is not enabled.


In [1]:
import torch
print(torch.__version__)
print("CUDA available:", torch.cuda.is_available())

2.10.0+cpu
CUDA available: False


In [2]:
import sys
print(sys.executable)

!{sys.executable} -m pip install -U pip
!{sys.executable} -m pip install -U xgboost



c:\Users\10199\anaconda3\envs\oran311\python.exe
Collecting xgboost
  Using cached xgboost-3.2.0-py3-none-win_amd64.whl.metadata (2.1 kB)
Using cached xgboost-3.2.0-py3-none-win_amd64.whl (101.7 MB)
Installing collected packages: xgboost
Successfully installed xgboost-3.2.0


In [2]:
import xgboost
from xgboost import XGBRegressor
print("xgboost version:", xgboost.__version__)


xgboost version: 3.2.0


## Model 3: Hybrid DNN–XGBoost (DNN Feature Extractor + XGBoost Regressor)

This model follows the hybrid pipeline described in the Cam-Ready paper.  
The key idea is to split the learning process into two stages:

1) A DNN is trained as a **feature extractor** to learn compact latent representations.  
2) The DNN is frozen, and a separate **XGBoost regressor** is trained on the extracted embeddings.

### DNN Feature Extractor Architecture
- Dense layers: 587 → 261 → 186 → 99
- Bottleneck embedding layer: 16 neurons
- Output head (for DNN training): 1 neuron (MSE loss)

### XGBoost Regressor (trained on embeddings)
- max_depth = 5
- n_estimators = 256
- learning_rate = 0.22


In [3]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# 1) load data
df = pd.read_csv("clean_ul_stage1.csv")

feature_cols = ["airtime", "selected_mcs", "txgain"]

target_col = "pm_power"

df = df.dropna(subset=feature_cols + [target_col]).copy()
for c in feature_cols + [target_col]:
    df[c] = pd.to_numeric(df[c], errors="coerce")
df = df.dropna(subset=feature_cols + [target_col]).copy()
df = df[df[target_col] > 0].copy() 

X = df[feature_cols].values
y = df[target_col].values


# 2) split: train/test then train/val
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val,  y_train, y_val  = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

# 3) scale (fit ONLY on train)
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_val_s   = scaler.transform(X_val)
X_test_s  = scaler.transform(X_test)

print("Shapes:", X_train_s.shape, X_val_s.shape, X_test_s.shape)


Shapes: (4153, 3) (462, 3) (1154, 3)


In [4]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.metrics import mean_squared_error, mean_absolute_error

from xgboost import XGBRegressor

class HybridFeatureExtractor(nn.Module):
    """
    DNN feature extractor + a small regression head.
    We train this end-to-end first, then freeze and use the 16-dim embeddings for XGBoost.
    """
    def __init__(self, input_dim):
        super().__init__()

        self.feature_net = nn.Sequential(
            nn.Linear(input_dim, 587),
            nn.ReLU(),

            nn.Linear(587, 261),
            nn.ReLU(),

            nn.Linear(261, 186),
            nn.ReLU(),

            nn.Linear(186, 99),
            nn.ReLU(),

            nn.Linear(99, 16)   # bottleneck embeddings (paper uses 16)
        )

        # regression head for training the DNN stage
        self.reg_head = nn.Linear(16, 1)

    def forward(self, x):
        emb = self.feature_net(x)
        out = self.reg_head(emb)
        return out, emb

X_train_tensor = torch.FloatTensor(X_train_s)
X_test_tensor  = torch.FloatTensor(X_test_s)

y_train_tensor = torch.FloatTensor(y_train).view(-1, 1)
y_test_tensor  = torch.FloatTensor(y_test).view(-1, 1)

input_dim = X_train_s.shape[1]




In [5]:
model3_dnn = HybridFeatureExtractor(input_dim)

criterion = nn.MSELoss()
optimizer = optim.Adam(model3_dnn.parameters(), lr=0.001)

epochs = 200
batch_size = 64

best_val_mse = float("inf")

print("\nUL dataset (model3 - Hybrid DNN) training:")

X_val_tensor = torch.FloatTensor(X_val_s)
y_val_tensor = torch.FloatTensor(y_val).view(-1, 1)


for epoch in range(epochs):
    model3_dnn.train()

    perm = torch.randperm(X_train_tensor.size(0))

    for i in range(0, X_train_tensor.size(0), batch_size):
        idx = perm[i:i+batch_size]
        bx = X_train_tensor[idx]
        by = y_train_tensor[idx]

        optimizer.zero_grad()
        pred, _ = model3_dnn(bx)
        loss = criterion(pred, by)
        loss.backward()
        optimizer.step()

    # ---- epoch-end evaluation ----
    model3_dnn.eval()
    with torch.no_grad():
        train_pred, _ = model3_dnn(X_train_tensor)
        val_pred, _   = model3_dnn(X_val_tensor)

        train_mse = criterion(train_pred, y_train_tensor).item()
        val_mse   = criterion(val_pred, y_val_tensor).item()

    if val_mse < best_val_mse:
        best_val_mse = val_mse

    # same printing style as model1
    if (epoch == 0) or ((epoch + 1) % 10 == 0):
        print(f"Epoch {epoch+1:03d} | train MSE {train_mse:.6f} | val MSE {val_mse:.6f}")

print(f"Best val MSE: {best_val_mse}")

model3_dnn.eval()

with torch.no_grad():
    _, emb_train = model3_dnn(X_train_tensor)
    _, emb_test  = model3_dnn(X_test_tensor)

emb_train = emb_train.numpy()
emb_test  = emb_test.numpy()

print("Embeddings shape (train):", emb_train.shape)
print("Embeddings shape (test) :", emb_test.shape)



UL dataset (model3 - Hybrid DNN) training:
Epoch 001 | train MSE 2.250002 | val MSE 2.081907
Epoch 010 | train MSE 0.100358 | val MSE 0.089086
Epoch 020 | train MSE 0.092565 | val MSE 0.084168
Epoch 030 | train MSE 0.096473 | val MSE 0.086918
Epoch 040 | train MSE 0.105854 | val MSE 0.100533
Epoch 050 | train MSE 0.101289 | val MSE 0.091574
Epoch 060 | train MSE 0.088105 | val MSE 0.080541
Epoch 070 | train MSE 0.161307 | val MSE 0.159567
Epoch 080 | train MSE 0.099074 | val MSE 0.091948
Epoch 090 | train MSE 0.083177 | val MSE 0.077287
Epoch 100 | train MSE 0.103379 | val MSE 0.100173
Epoch 110 | train MSE 0.090587 | val MSE 0.087209
Epoch 120 | train MSE 0.124728 | val MSE 0.122327
Epoch 130 | train MSE 0.127948 | val MSE 0.126945
Epoch 140 | train MSE 0.101011 | val MSE 0.094028
Epoch 150 | train MSE 0.097970 | val MSE 0.095075
Epoch 160 | train MSE 0.102417 | val MSE 0.094853
Epoch 170 | train MSE 0.080623 | val MSE 0.076628
Epoch 180 | train MSE 0.095710 | val MSE 0.091870
Epoch 

In [6]:
xgb = XGBRegressor(
    max_depth=5,
    n_estimators=256,
    learning_rate=0.22,
    objective="reg:squarederror",
    random_state=42
)

xgb.fit(emb_train, y_train)

y_pred_xgb = xgb.predict(emb_test).reshape(-1)
y_true = np.asarray(y_test).reshape(-1)

def mean_relative_error(y_true, y_pred, eps=1e-9):
    y_true = np.asarray(y_true).reshape(-1)
    y_pred = np.asarray(y_pred).reshape(-1)
    return float(np.mean(np.abs(y_true - y_pred) / (np.abs(y_true) + eps)) * 100)

mse  = mean_squared_error(y_true, y_pred_xgb)
rmse = float(np.sqrt(mse))
mae  = mean_absolute_error(y_true, y_pred_xgb)
mre  = mean_relative_error(y_true, y_pred_xgb)

print("\n=== Model 3: Hybrid DNN–XGBoost ===")
print("X:", feature_cols, " y:", target_col)
print(f"MSE  : {mse:.6f}")
print(f"RMSE : {rmse:.6f}")
print(f"MAE  : {mae:.6f}")
print(f"MRE% : {mre:.4f}")



=== Model 3: Hybrid DNN–XGBoost ===
X: ['airtime', 'selected_mcs', 'txgain']  y: pm_power
MSE  : 0.106532
RMSE : 0.326393
MAE  : 0.226258
MRE% : 1.9640


In [None]:
# ============================================================
# CELL 4 — 方案B版 Model3 (Slice-wise) ✅ COPY-PASTE RUNNABLE
# DNN Embedding + XGBoost(native ES, xgb.train)  | log1p target
#
# 方案B含义：
#   - 按主变量（txgain / selected_mcs / airtime）的每个取值切片(slice)
#   - 每个 slice 内随机 split: 80%train / 10%val(在train里) / 20%test
#   - 模型输入默认只用“条件特征”(traffic_load, BW, nRBs, clockspeed)
#   no feature engineering
# 依赖已在内存中存在：
#   - df (pd.DataFrame)
#   - FEATURE_SETS (dict)  # 你原来的那份也可以
#   - target_col (str)     # e.g. "pm_power" 
# ============================================================

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import xgboost as xgb

# ---------------------------
# 0) Seed
# ---------------------------
def set_seed(seed=42):
    import random
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# ---------------------------
# 1) Metrics
# ---------------------------
def mean_relative_error(y_true, y_pred, eps=1e-3):
    y_true = np.asarray(y_true).reshape(-1)
    y_pred = np.asarray(y_pred).reshape(-1)
    return float(np.mean(np.abs(y_true - y_pred) / (np.abs(y_true) + eps)) * 100.0)

def compute_metrics(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = float(np.sqrt(mse))
    mae = mean_absolute_error(y_true, y_pred)
    mre = mean_relative_error(y_true, y_pred)
    return {"MSE": float(mse), "RMSE": rmse, "MAE": float(mae), "MRE(%)": float(mre)}

# ---------------------------
# 2) Clean numeric
# ---------------------------
def clean_numeric_df(df, cols_needed, target_col):
    d = df.dropna(subset=cols_needed).copy()
    for c in cols_needed:
        d[c] = pd.to_numeric(d[c], errors="coerce")
    d = d.dropna(subset=cols_needed).copy()
    d = d[d[target_col] > 0].copy()
    return d

# ---------------------------
# 3) Dataset
# ---------------------------
class TabularDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(np.asarray(X), dtype=torch.float32)
        self.y = torch.tensor(np.asarray(y), dtype=torch.float32).reshape(-1, 1)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# ---------------------------
# 4) DNN Feature Extractor
# ---------------------------
class HybridFeatureExtractor(nn.Module):
    def __init__(self, input_dim, emb_dim=16, dropout=0.2):
        super().__init__()
        self.feature_net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(dropout),

            nn.Linear(64, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(dropout/2),

            nn.Linear(32, emb_dim),
        )
        self.reg_head = nn.Linear(emb_dim, 1)

    def forward(self, x):
        emb = self.feature_net(x)
        out = self.reg_head(emb)
        return out, emb

# ---------------------------
# 5) Train DNN + Extract Embeddings (log1p target)
# ---------------------------
def train_dnn_and_extract_embeddings(
    X_train_s, y_train_log,
    X_val_s,   y_val_log,
    X_test_s,  y_test_log,
    input_dim,
    emb_dim=128,
    epochs=100,
    batch_size=128,
    lr=1e-3,
    weight_decay=1e-4,
    patience=20,
    min_delta=1e-6,
    verbose_every=50,
    seed=42
):
    set_seed(seed)
    device = "cuda" if torch.cuda.is_available() else "cpu"

    train_loader = DataLoader(TabularDataset(X_train_s, y_train_log), batch_size=batch_size, shuffle=True)
    val_loader   = DataLoader(TabularDataset(X_val_s,   y_val_log),   batch_size=batch_size, shuffle=False)
    test_loader  = DataLoader(TabularDataset(X_test_s,  y_test_log),  batch_size=batch_size, shuffle=False)

    model = HybridFeatureExtractor(input_dim=input_dim, emb_dim=emb_dim).to(device)

    loss_fn = nn.HuberLoss(delta=1.0)
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=15)

    best_val = float("inf")
    best_state = None
    no_improve = 0

    for epoch in range(1, epochs + 1):
        model.train()
        train_sum = 0.0
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            pred, _ = model(xb)
            loss = loss_fn(pred, yb)
            loss.backward()
            optimizer.step()
            train_sum += loss.item() * len(xb)
        train_loss = train_sum / len(train_loader.dataset)

        model.eval()
        val_sum = 0.0
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                pred, _ = model(xb)
                val_sum += loss_fn(pred, yb).item() * len(xb)
        val_loss = val_sum / len(val_loader.dataset)
        scheduler.step(val_loss)

        if val_loss < best_val - min_delta:
            best_val = val_loss
            best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
            no_improve = 0
        else:
            no_improve += 1

        if epoch == 1 or epoch % verbose_every == 0:
            print(f"  DNN Epoch {epoch:03d} | train {train_loss:.6f} | val {val_loss:.6f} | no_improve={no_improve}")

        if no_improve >= patience:
            print(f"  DNN Early stop @ epoch {epoch} (patience={patience})")
            break

    if best_state is not None:
        model.load_state_dict(best_state)

    def extract_emb(loader):
        model.eval()
        embs, ys = [], []
        with torch.no_grad():
            for xb, yb in loader:
                xb = xb.to(device)
                _, emb = model(xb)
                embs.append(emb.cpu().numpy())
                ys.append(yb.numpy().reshape(-1))
        return np.vstack(embs), np.concatenate(ys)

    emb_train, y_train_log_1d = extract_emb(train_loader)
    emb_val,   y_val_log_1d   = extract_emb(val_loader)
    emb_test,  y_test_log_1d  = extract_emb(test_loader)

    return model, emb_train, y_train_log_1d, emb_val, y_val_log_1d, emb_test, y_test_log_1d

# ---------------------------
# 6) XGBoost native ES (xgb.train)
# ---------------------------
def train_xgb_native_early_stopping(
    emb_train, y_train_log,
    emb_val,   y_val_log,
    X_train_s=None,
    X_val_s=None,
    seed=42,
    params=None,
    num_boost_round=20000,
    early_stopping_rounds=200
):
    if params is None:
        params = {
            "objective": "reg:squarederror",
            "eval_metric": "rmse",
            "eta": 0.01,
            "max_depth": 5,
            "min_child_weight": 3,
            "subsample": 0.85,
            "colsample_bytree": 0.9,
            "lambda": 1.5,
            "alpha": 0.5,
            "gamma": 0.1,
            "tree_method": "hist",
            "seed": seed,
        }

    use_combined = (X_train_s is not None) and (X_val_s is not None)
    if use_combined:
        X_train = np.hstack([emb_train, X_train_s])
        X_val   = np.hstack([emb_val,   X_val_s])
    else:
        X_train = emb_train
        X_val   = emb_val

    dtrain = xgb.DMatrix(X_train, label=y_train_log)
    dval   = xgb.DMatrix(X_val,   label=y_val_log)

    booster = xgb.train(
        params=params,
        dtrain=dtrain,
        num_boost_round=500,
        evals=[(dtrain, "train"), (dval, "val")],
        early_stopping_rounds=early_stopping_rounds,
        verbose_eval=False
    )
    return booster, use_combined

# ---------------------------
# 7) One slice pipeline (方案B核心)
# ---------------------------
def train_eval_model3_one_slice(
    d_slice,
    feature_cols,
    target_col,
    seed=42,
    # split
    train_ratio=0.8,
    val_ratio_within_train=0.1,
    # DNN
    emb_dim=16,
    epochs=100,
    batch_size=128,
    lr=1e-3,
    weight_decay=1e-4,
    patience=20,
    min_delta=1e-6,
    verbose_every=50,
    # XGB
    xgb_params=None,
    num_boost_round=500,
    early_stopping_rounds=50,
    # whether to concat raw scaled features with embedding
    use_raw_plus_emb=True
):
    set_seed(seed)

    # split inside slice
    d_train, d_test = train_test_split(d_slice, test_size=(1 - train_ratio), random_state=seed)
    d_train, d_val  = train_test_split(d_train, test_size=val_ratio_within_train, random_state=seed)

    X_train = d_train[feature_cols].values
    X_val   = d_val[feature_cols].values
    X_test  = d_test[feature_cols].values

    y_train = d_train[target_col].values.astype(float)
    y_val   = d_val[target_col].values.astype(float)
    y_test  = d_test[target_col].values.astype(float)

    scaler = StandardScaler()
    X_train_s = scaler.fit_transform(X_train)
    X_val_s   = scaler.transform(X_val)
    X_test_s  = scaler.transform(X_test)

    y_train_log = np.log1p(y_train)
    y_val_log   = np.log1p(y_val)
    y_test_log  = np.log1p(y_test)

    # DNN -> embeddings
    dnn, emb_train, y_train_log_1d, emb_val, y_val_log_1d, emb_test, y_test_log_1d = train_dnn_and_extract_embeddings(
        X_train_s, y_train_log,
        X_val_s,   y_val_log,
        X_test_s,  y_test_log,
        input_dim=len(feature_cols),
        emb_dim=emb_dim,
        epochs=epochs,
        batch_size=batch_size,
        lr=lr,
        weight_decay=weight_decay,
        patience=patience,
        min_delta=min_delta,
        verbose_every=verbose_every,
        seed=seed
    )

    # XGB
    booster, used_combined = train_xgb_native_early_stopping(
        emb_train, y_train_log_1d,
        emb_val,   y_val_log_1d,
        X_train_s=(X_train_s if use_raw_plus_emb else None),
        X_val_s=(X_val_s if use_raw_plus_emb else None),
        seed=seed,
        params=xgb_params,
        num_boost_round=num_boost_round,
        early_stopping_rounds=early_stopping_rounds
    )

    # test predict
    if used_combined:
        X_test_for_xgb = np.hstack([emb_test, X_test_s])
    else:
        X_test_for_xgb = emb_test

    dtest = xgb.DMatrix(X_test_for_xgb)
    y_pred_log = booster.predict(dtest, iteration_range=(0, booster.best_iteration + 1)).reshape(-1)

    y_pred = np.maximum(np.expm1(y_pred_log), 0.0)
    y_true = np.expm1(y_test_log_1d)

    metrics = compute_metrics(y_true, y_pred)

    rmse_log = float(np.sqrt(mean_squared_error(y_test_log_1d, y_pred_log)))
    approx_pct = float(np.expm1(rmse_log) * 100.0)

    return {
        "metrics": metrics,
        "rmse_log": rmse_log,
        "approx_pct": approx_pct,
        "n_train": len(d_train),
        "n_val": len(d_val),
        "n_test": len(d_test),

        "d_train": d_train.copy(),
        "d_val": d_val.copy(),
        "d_test": d_test.copy(),

        "dnn": dnn,
        "booster": booster,
        "scaler": scaler,
        "y_true": y_true,
        "y_pred": y_pred
    }

# ============================================================
# 8) 方案B：按 slice 逐个训练 + 汇总
# ============================================================

# 默认：thesis/方案B常用“条件特征”作为输入（主变量用来分 slice，不放进输入）
COND_FEATURES = ["traffic_load", "BW", "nRBs", "clockspeed"]
COND_FEATURES = [c for c in COND_FEATURES if c in df.columns]

# 你要模仿的“3个主变量实验”
EXPERIMENTS = {
    "gain": "txgain",
    "mcs": "selected_mcs",
    "airtime": "airtime",
}

# 如果你坚持“把主变量也作为输入特征”，改成 True（一般方案B不需要）
INCLUDE_SLICE_IN_INPUT = False

MIN_SLICE_SIZE = 30   # 每个 slice 至少多少样本才训练（太小会很不稳定）
SEED = 42

all_rows = []
test_outputs_m3B = {}      # 你后面画图可用
trained_models_m3B = {}    # 保存每个 slice 的模型

for exp_name, slice_col in EXPERIMENTS.items():
    if slice_col not in df.columns:
        print(f"[Skip] {exp_name}: slice_col '{slice_col}' not in df.columns")
        continue

    # 输入特征
    feature_cols = COND_FEATURES.copy()
    if INCLUDE_SLICE_IN_INPUT and slice_col not in feature_cols:
        feature_cols = [slice_col] + feature_cols

    cols_needed = feature_cols + [slice_col, target_col]
    d0 = clean_numeric_df(df, cols_needed, target_col)

    print("\n=====================================================")
    print(f"[Model3] Experiment: {exp_name} | slice_col={slice_col}")
    print(f"Input features: {feature_cols}")
    print("=====================================================")

    for sval, d_slice in d0.groupby(slice_col):
        if len(d_slice) < MIN_SLICE_SIZE:
            continue

        print(f"\n--- slice {slice_col}={sval} | n={len(d_slice)} ---")

        out = train_eval_model3_one_slice(
            d_slice=d_slice,
            feature_cols=feature_cols,
            target_col=target_col,
            seed=SEED,
            train_ratio=0.8,
            val_ratio_within_train=0.1,
            emb_dim=128,
            epochs=600,
            batch_size=128,
            lr=1e-3,
            weight_decay=1e-4,
            patience=60,
            min_delta=1e-6,
            verbose_every=100,
            xgb_params=None,
            num_boost_round=20000,
            early_stopping_rounds=200,
            use_raw_plus_emb=True
        )

        m = out["metrics"]

        all_rows.append({
            "experiment": exp_name,
            "slice_col": slice_col,
            "slice_value": sval,
            "features": ",".join(feature_cols),
            "MSE": m["MSE"],
            "RMSE": m["RMSE"],
            "MAE": m["MAE"],
            "MRE(%)": m["MRE(%)"],
            "RMSE_log": out["rmse_log"],
            "approx_rel_err(%)": out["approx_pct"],
            "n_train": out["n_train"],
            "n_val": out["n_val"],
            "n_test": out["n_test"],
            "n_slice": len(d_slice),
        })

        # 保存测试输出/模型（按 experiment + slice_value）
        test_outputs_m3B.setdefault(exp_name, {})
        test_outputs_m3B[exp_name][sval] = {
            "test_df": out["d_test"].copy(),
            "y_true": out["y_true"],
            "y_pred_m3B": out["y_pred"]
        }

        trained_models_m3B.setdefault(exp_name, {})
        trained_models_m3B[exp_name][sval] = {
            "dnn": out["dnn"],
            "booster": out["booster"],
            "scaler": out["scaler"],
            "feature_cols": feature_cols
        }

# per-slice results
results_slices_df = pd.DataFrame(all_rows)
print("\n===== Model3: per-slice Results =====")
display(results_slices_df.sort_values(["experiment", "slice_value"]))

# weighted summary per experiment (按 n_test 加权)
if len(results_slices_df) > 0:
    summary_rows = []
    for exp_name in results_slices_df["experiment"].unique():
        sub = results_slices_df[results_slices_df["experiment"] == exp_name].copy()
        w = sub["n_test"].values.astype(float)
        w = np.maximum(w, 1.0)
        def wavg(col):
            return float(np.sum(sub[col].values * w) / np.sum(w))
        summary_rows.append({
            "experiment": exp_name,
            "n_slices": int(len(sub)),
            "RMSE_wavg": wavg("RMSE"),
            "MAE_wavg":  wavg("MAE"),
            "MRE_wavg(%)": wavg("MRE(%)"),
            "MSE_wavg":  wavg("MSE"),
        })
    results_summary_df = pd.DataFrame(summary_rows).sort_values("MRE_wavg(%)")
    print("\n===== Model3:Summary (weighted by n_test) =====")
    display(results_summary_df)
else:
    print("\n[Warn] empty")
