In [2]:
# tft_selected15_train_eval_fixed.py
# ==========================================
# Temporal Fusion Transformer (TFT) for dataset_15
# - CPU only, dataset kecil (~4.9k rows)
# - Data loading & sanitation
# - Dataset construction (validation via min_prediction_idx)
# - Random Search + EarlyStopping (ringkas)
# - Bayesian Optimization (ringkas)
# - Evaluasi TEST hanya untuk BASE9; forecast ALL series
# - Metrik: MAE, RMSE, MSE, MAPE, sMAPE
# Output di: data\dataset_15\tft_runs
# ==========================================

import warnings
warnings.filterwarnings("ignore")

from pathlib import Path
import json
import numpy as np
import pandas as pd

import torch
from lightning.pytorch import Trainer, seed_everything
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint, LearningRateMonitor
from lightning.pytorch.loggers import CSVLogger

from pytorch_forecasting import TimeSeriesDataSet
from pytorch_forecasting.models import TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss

import optuna
from optuna.integration import PyTorchLightningPruningCallback

# ======================================================
# Paths
# ======================================================
BASE_DIR = Path(r"D:\Documents\Skripsi\demand-forecasting\data\dataset_15")
TFT_DATA = BASE_DIR / "tft_dataset_15.csv"              # dari pipeline selected-15

OUT_DIR  = BASE_DIR / "tft_runs"
OUT_DIR.mkdir(parents=True, exist_ok=True)

seed_everything(42)

# ======================================================
# Config (FINAL, 4966 rows, CPU)
# ======================================================
ENC_LEN  = 24
PRED_LEN = 5

# Epoch & trials dibuat waras untuk CPU
MAX_EPOCHS_RS = 8        # random search
MAX_EPOCHS_BO = 12       # bayes opt
EARLY_PATIENCE = 3

N_RS = 2                 # random search trials
N_BO_TRIALS = 4          # bayes opt trials

BATCH_SIZE = 32          # batch kecil aman di CPU

# Evaluate only these 9 series on TEST, forecast for all
BASE9 = [
    ("02A","BUVW001KSW"), ("05A","BUVW001KSW"), ("13A","DOPQ001K002"),
    ("13I","BUVW001KSW"), ("14A","BUVW001KSW"), ("16C","DOPQ001K009"),
    ("17A","DOPQ001K002"), ("23A","BUVW001KSW"), ("29A","BUVW001KSW"),
]
BASE9_SET = set(BASE9)

DEVICE = "cpu"   # paksa CPU biar nggak drama

# ======================================================
# Helpers (metrics)
# ======================================================
def rmse(a, b):
    a = np.asarray(a, dtype=float); b = np.asarray(b, dtype=float)
    return float(np.sqrt(np.nanmean((a - b) ** 2))) if len(a) and len(b) else np.nan

def mse(a, b):
    a = np.asarray(a, dtype=float); b = np.asarray(b, dtype=float)
    return float(np.nanmean((a - b) ** 2)) if len(a) and len(b) else np.nan

def mae(a, b):
    a = np.asarray(a, dtype=float); b = np.asarray(b, dtype=float)
    return float(np.nanmean(np.abs(a - b))) if len(a) and len(b) else np.nan

def smape(a, b):
    a = np.asarray(a, dtype=float); b = np.asarray(b, dtype=float)
    denom = (np.abs(a) + np.abs(b)) / 2.0
    m = denom > 0
    return float(np.nanmean(np.abs(a[m] - b[m]) / denom[m])) * 100 if m.any() else np.nan

def mape(a, b, eps=1e-8):
    """
    MAPE (%). Periode dengan y_true ~ 0 di-skip supaya tidak blow up.
    """
    a = np.asarray(a, dtype=float); b = np.asarray(b, dtype=float)
    mask = np.abs(a) > eps
    if not mask.any():
        return np.nan
    return float(np.nanmean(np.abs((a[mask] - b[mask]) / a[mask]))) * 100.0

def to_time_idx(df, col="periode"):
    df = df.copy()
    df[col] = pd.to_datetime(df[col]).dt.to_period("M").dt.to_timestamp()
    uniq = np.sort(df[col].unique())
    mapper = {ts: i for i, ts in enumerate(uniq)}
    df["time_idx"] = df[col].map(mapper)
    return df, mapper

def build_known_unknown_lists(df):
    base_known = ["event_flag","event_flag_lag1","holiday_count",
                  "holiday_count_lag1","rainfall_lag1","month","year","qtr"]
    known_reals = [c for c in base_known if c in df.columns]
    unknown_reals = ["qty"]
    return known_reals, unknown_reals

def make_datasets(df, enc_len=ENC_LEN, pred_len=PRED_LEN):
    train_mask = df["is_train"].eq(1)
    train_time_max = int(df.loc[train_mask, "time_idx"].max())
    val_start_tidx = int(train_time_max - pred_len + 1)

    static_cats  = [c for c in ["area","cabang","sku"] if c in df.columns]
    known_reals, unknown_reals = build_known_unknown_lists(df)
    weight_col = "sample_weight" if "sample_weight" in df.columns else None

    training = TimeSeriesDataSet(
        df[train_mask].copy(),
        time_idx="time_idx",
        target="qty",
        group_ids=["cabang","sku"],
        weight=weight_col,
        static_categoricals=static_cats,
        static_reals=[],
        time_varying_known_categoricals=[],
        time_varying_known_reals=known_reals,
        time_varying_unknown_categoricals=[],
        time_varying_unknown_reals=unknown_reals,
        max_encoder_length=enc_len,
        min_encoder_length=3,
        max_prediction_length=pred_len,
        min_prediction_length=pred_len,
        add_relative_time_idx=True,
        add_encoder_length=True,
        target_normalizer=None,   # kalau mau, bisa diganti GroupNormalizer
        allow_missing_timesteps=True,
    )

    validation = TimeSeriesDataSet.from_dataset(
        training,
        df[train_mask].copy(),
        stop_randomization=True,
        min_prediction_idx=val_start_tidx
    )

    return training, validation, val_start_tidx

def last_k_decoder_rows(group, k=PRED_LEN):
    return group.tail(k)

# ======================================================
# Load data & Sanity-fix
# ======================================================
df = pd.read_csv(TFT_DATA, parse_dates=["periode"])
df = df.sort_values(["cabang","sku","periode"]).reset_index(drop=True)

# Remove rainfall current jika masih ada
if "rainfall" in df.columns:
    df = df.drop(columns=["rainfall"])

# Fill lag1 exog NaNs dengan 0
for c in ["event_flag_lag1","holiday_count_lag1","rainfall_lag1"]:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce").fillna(0)

# Ensure time columns
if "month" not in df.columns:
    df["month"] = pd.to_datetime(df["periode"]).dt.month
if "year" not in df.columns:
    df["year"]  = pd.to_datetime(df["periode"]).dt.year
if "qtr" not in df.columns:
    df["qtr"]   = pd.to_datetime(df["periode"]).dt.quarter

# time_idx
df, _mapper = to_time_idx(df, "periode")

# sample_weight default
if "sample_weight" not in df.columns:
    df["sample_weight"] = 1.0

# ======================================================
# Build datasets & dataloaders
# ======================================================
training, validation, val_start_tidx = make_datasets(df, ENC_LEN, PRED_LEN)
train_loader = training.to_dataloader(train=True,  batch_size=BATCH_SIZE, num_workers=0)
val_loader   = validation.to_dataloader(train=False, batch_size=BATCH_SIZE, num_workers=0)

# ======================================================
# Trainer builder (CPU, progress bar ON)
# ======================================================
def build_trainer(run_dir, patience=EARLY_PATIENCE, max_epochs=MAX_EPOCHS_RS):
    ckpt_cb = ModelCheckpoint(
        dirpath=run_dir / "checkpoints",
        filename="tft-{epoch:02d}-{val_loss:.4f}",
        monitor="val_loss",
        mode="min",
        save_top_k=1
    )
    es_cb = EarlyStopping(monitor="val_loss", mode="min", patience=patience)
    lr_cb = LearningRateMonitor(logging_interval="epoch")
    logger = CSVLogger(save_dir=run_dir, name="tft_logs")
    trainer = Trainer(
        max_epochs=max_epochs,
        accelerator="cpu",
        devices=1,
        logger=logger,
        callbacks=[ckpt_cb, es_cb, lr_cb],
        enable_progress_bar=True,
    )
    return trainer, ckpt_cb

# ======================================================
# Random Search
# ======================================================
def random_tft_params():
    # dataset kecil, model kecil saja
    hs = int(np.random.choice([8, 12, 16, 24]))
    ah = int(np.random.choice([1, 2]))
    hcont = int(np.random.choice([8, 12, 16]))
    drop = float(np.random.uniform(0.05, 0.30))
    lr = float(10 ** np.random.uniform(-4.0, -3.0))
    return {
        "hidden_size": int(hs),
        "attention_head_size": int(ah),
        "dropout": drop,
        "hidden_continuous_size": int(hcont),
        "learning_rate": lr,
    }

best_rs = {"val_loss": np.inf, "ckpt": None, "params": None}

for i in range(N_RS):
    run_dir = OUT_DIR / f"rs_trial_{i+1:02d}"
    run_dir.mkdir(parents=True, exist_ok=True)

    p = random_tft_params()
    model = TemporalFusionTransformer.from_dataset(
        training,
        loss=QuantileLoss(),
        hidden_size=int(p["hidden_size"]),
        attention_head_size=int(p["attention_head_size"]),
        dropout=float(p["dropout"]),
        hidden_continuous_size=int(p["hidden_continuous_size"]),
        learning_rate=float(p["learning_rate"]),
        output_size=7,
        log_interval=10,
        reduce_on_plateau_patience=3,
    )

    trainer, ckpt_cb = build_trainer(run_dir, patience=EARLY_PATIENCE, max_epochs=MAX_EPOCHS_RS)
    trainer.fit(model, train_loader, val_loader)

    best_path = ckpt_cb.best_model_path
    best_loss = ckpt_cb.best_model_score.item() if ckpt_cb.best_model_score is not None else np.inf
    if best_loss < best_rs["val_loss"]:
        best_rs = {"val_loss": best_loss, "ckpt": best_path, "params": p}

# ======================================================
# Bayesian Optimization with Optuna (refine)
# ======================================================
def objective(trial: optuna.Trial):
    run_dir = OUT_DIR / f"bo_trial_{trial.number:03d}"
    run_dir.mkdir(parents=True, exist_ok=True)

    hidden_size = int(trial.suggest_categorical("hidden_size", [8, 12, 16, 24, 32]))
    attn_heads  = int(trial.suggest_categorical("attention_head_size", [1, 2]))
    dropout     = float(trial.suggest_float("dropout", 0.05, 0.30, step=0.025))
    hcont       = int(trial.suggest_categorical("hidden_continuous_size", [8, 12, 16, 24]))
    lr          = float(trial.suggest_float("learning_rate", 1e-4, 5e-3, log=True))

    model = TemporalFusionTransformer.from_dataset(
        training,
        loss=QuantileLoss(),
        hidden_size=int(hidden_size),
        attention_head_size=int(attn_heads),
        dropout=float(dropout),
        hidden_continuous_size=int(hcont),
        learning_rate=float(lr),
        output_size=7,
        log_interval=10,
        reduce_on_plateau_patience=3,
    )

    ckpt_cb = ModelCheckpoint(
        dirpath=run_dir / "checkpoints",
        filename="tft-{epoch:02d}-{val_loss:.4f}",
        monitor="val_loss",
        mode="min",
        save_top_k=1
    )
    es_cb = EarlyStopping(monitor="val_loss", mode="min", patience=EARLY_PATIENCE)
    lr_cb = LearningRateMonitor(logging_interval="epoch")
    logger = CSVLogger(save_dir=run_dir, name="tft_logs")

    trainer = Trainer(
        max_epochs=MAX_EPOCHS_BO,
        accelerator="cpu",
        devices=1,
        logger=logger,
        callbacks=[ckpt_cb, es_cb, lr_cb, PyTorchLightningPruningCallback(trial, monitor="val_loss")],
        enable_progress_bar=True,
    )
    trainer.fit(model, train_loader, val_loader)

    best = ckpt_cb.best_model_score.item() if ckpt_cb.best_model_score is not None else np.inf
    trial.set_user_attr("ckpt_path", ckpt_cb.best_model_path)
    return best

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=N_BO_TRIALS, show_progress_bar=False)

best_trial = study.best_trial
best_ckpt  = best_trial.user_attrs.get("ckpt_path", None)
best_val   = best_trial.value

# Compare with Random Search
final_ckpt = best_ckpt
final_val  = best_val
final_src  = "bayes"
if best_rs["val_loss"] < final_val:
    final_ckpt = best_rs["ckpt"]
    final_val  = best_rs["val_loss"]
    final_src  = "random_search"

# Save summary tuning
summary = {
    "final_source": final_src,
    "final_val_loss": final_val,
    "final_ckpt": final_ckpt,
    "rs_best": best_rs,
    "bo_best": {"val_loss": best_val, "ckpt": best_ckpt, "params": best_trial.params},
}
with open(OUT_DIR / "tft_best_summary.json", "w", encoding="utf-8") as f:
    json.dump(summary, f, ensure_ascii=False, indent=2)

# ======================================================
# Load best model
# ======================================================
best_model = TemporalFusionTransformer.load_from_checkpoint(final_ckpt)

# Trainer khusus predict
pred_trainer = Trainer(
    accelerator="cpu",
    devices=1,
    enable_progress_bar=False,
)

# ======================================================
# Predict TEST untuk BASE9 saja (eval)
# ======================================================
test_mask = df["is_test"].eq(1)
mask_base9 = df.apply(lambda r: (r["cabang"], r["sku"]) in BASE9_SET, axis=1)

predict_test_df = df[test_mask & mask_base9].copy()
predict_test = TimeSeriesDataSet.from_dataset(
    training,
    predict_test_df,
    stop_randomization=True,
    predict=True
)
pred_loader = predict_test.to_dataloader(train=False, batch_size=256, num_workers=0)

preds = pred_trainer.predict(best_model, dataloaders=pred_loader, return_predictions=True)
yhat = torch.cat([p for p in preds]).detach().cpu().numpy().reshape(-1)

# Build decoder keys untuk BASE9
dec_keys = (
    predict_test_df
    .groupby(["cabang","sku"], as_index=False)
    .apply(last_k_decoder_rows, k=PRED_LEN)
    .reset_index(drop=True)
)[["cabang","sku","time_idx","periode","qty"]]

m = min(len(dec_keys), len(yhat))
pred_test_out = dec_keys.head(m).copy()
pred_test_out["yhat"] = yhat[:m]

# Metrics per series BASE9
metrics_rows = []
for (cab, sku), g in pred_test_out.groupby(["cabang","sku"], sort=True):
    y_true = g["qty"].values
    y_pred = g["yhat"].values
    metrics_rows.append({
        "cabang": cab,
        "sku": sku,
        "MAE": mae(y_true, y_pred),
        "RMSE": rmse(y_true, y_pred),
        "MSE": mse(y_true, y_pred),
        "MAPE%": mape(y_true, y_pred),
        "sMAPE%": smape(y_true, y_pred),
    })
metrics_df = pd.DataFrame(metrics_rows).sort_values(["cabang","sku"])
metrics_df.to_csv(OUT_DIR / "tft_base9_test_metrics.csv", index=False)

# ======================================================
# Forecast ALL series di TEST window
# ======================================================
predict_all_df = df[df["is_test"].eq(1)].copy()
predict_all = TimeSeriesDataSet.from_dataset(
    training,
    predict_all_df,
    stop_randomization=True,
    predict=True
)
pred_all_loader = predict_all.to_dataloader(train=False, batch_size=256, num_workers=0)
preds_all = pred_trainer.predict(best_model, dataloaders=pred_all_loader, return_predictions=True)
yhat_all = torch.cat([p for p in preds_all]).detach().cpu().numpy().reshape(-1)

dec_keys_all = (
    predict_all_df
    .groupby(["cabang","sku"], as_index=False)
    .apply(last_k_decoder_rows, k=PRED_LEN)
    .reset_index(drop=True)
)[["cabang","sku","time_idx","periode","qty"]]

m2 = min(len(dec_keys_all), len(yhat_all))
forecast_all = dec_keys_all.head(m2).copy()
forecast_all["yhat"] = yhat_all[:m2]
forecast_all.to_csv(OUT_DIR / "tft_allseries_test_forecast.csv", index=False)

# ======================================================
# Save small summary run
# ======================================================
summary_rows = {
    "device": DEVICE,
    "encoder_length": ENC_LEN,
    "prediction_length": PRED_LEN,
    "final_source": final_src,
    "final_val_loss": final_val,
    "final_ckpt": final_ckpt,
    "num_train_batches": len(train_loader),
    "num_val_batches": len(val_loader),
    "num_test_rows_all": int(predict_all_df.shape[0]),
    "num_test_rows_base9": int(predict_test_df.shape[0]),
}
pd.DataFrame([summary_rows]).to_csv(OUT_DIR / "tft_run_summary.csv", index=False)

# Done


Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 776    | train
3  | prescalers                         | ModuleDict                      | 176    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 884    | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 6.6 K  | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 5.9 K  | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K  |

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=8` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 482    | train
3  | prescalers                         | ModuleDict                      | 264    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 660    | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 5.2 K  | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 4.7 K  | train
7  | static_context_variable_selection  | GatedResid

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=8` reached.
[I 2025-11-13 02:59:42,971] A new study created in memory with name: no-name-80cf6e99-b54e-430c-801d-416f1c59c63a
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 776    | train
3  | prescalers                         | ModuleDict                      | 264    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 1.2 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 10.1 K | train
6  | decoder_variable_selecti

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=12` reached.
[I 2025-11-13 03:11:32,512] Trial 0 finished with value: 1204.6226806640625 and parameters: {'hidden_size': 16, 'attention_head_size': 2, 'dropout': 0.15000000000000002, 'hidden_continuous_size': 12, 'learning_rate': 0.0010450988923219415}. Best is trial 0 with value: 1204.6226806640625.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 776    | train
3  | prescalers                         | ModuleDict                      | 264    | train
4  | static_variable_selection     

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=12` reached.
[I 2025-11-13 03:24:10,330] Trial 1 finished with value: 1590.947021484375 and parameters: {'hidden_size': 24, 'attention_head_size': 1, 'dropout': 0.07500000000000001, 'hidden_continuous_size': 12, 'learning_rate': 0.00029501975411681477}. Best is trial 0 with value: 1204.6226806640625.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 776    | train
3  | prescalers                         | ModuleDict                      | 528    | train
4  | static_variable_selection     

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=12` reached.
[I 2025-11-13 03:36:20,149] Trial 2 finished with value: 1583.773681640625 and parameters: {'hidden_size': 24, 'attention_head_size': 2, 'dropout': 0.125, 'hidden_continuous_size': 24, 'learning_rate': 0.0003127650301546927}. Best is trial 0 with value: 1204.6226806640625.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 482    | train
3  | prescalers                         | ModuleDict                      | 352    | train
4  | static_variable_selection          | Variable

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=12` reached.
[I 2025-11-13 03:46:19,151] Trial 3 finished with value: 1652.7574462890625 and parameters: {'hidden_size': 8, 'attention_head_size': 2, 'dropout': 0.125, 'hidden_continuous_size': 16, 'learning_rate': 0.00014001131366257033}. Best is trial 0 with value: 1204.6226806640625.
ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores


AssertionError: filters should not remove entries all entries - check encoder/decoder lengths and lags

cb yg baru

In [None]:
import warnings
from pathlib import Path

import numpy as np
import pandas as pd
import torch

from lightning.pytorch import Trainer, seed_everything
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor

from pytorch_forecasting import TimeSeriesDataSet
from pytorch_forecasting.models import TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_forecasting.data.encoders import GroupNormalizer

import optuna

warnings.filterwarnings("ignore")

# =========================
# PATHS
# =========================
PROJECT_ROOT   = Path(r"D:\Documents\Skripsi\demand-forecasting")
DATASET15_DIR  = PROJECT_ROOT / "data" / "dataset_15"
OUT_DIR        = PROJECT_ROOT / "outputs" / "tft_selected15"
OUT_DIR.mkdir(parents=True, exist_ok=True)

DATA_PATH      = DATASET15_DIR / "tft_dataset_15_fullfeat.csv"

seed_everything(42)

# =========================
# LOAD & PREPARE DATA
# =========================
df = pd.read_csv(DATA_PATH, parse_dates=["periode"])
df = df.sort_values(["cabang", "sku", "periode"]).reset_index(drop=True)

print("Rows TFT 15 fullfeat:", len(df))

# time_idx integer global
df = df.sort_values("periode")
df["time_idx"] = df["periode"].rank(method="dense").astype("int64")

# qty numeric, drop NaN
df["qty"] = pd.to_numeric(df["qty"], errors="coerce")
df = df.dropna(subset=["qty"]).reset_index(drop=True)

required_cols = [
    "area", "cabang", "sku", "periode", "qty",
    "is_train", "is_test", "sample_weight",
    "event_flag", "holiday_count", "rainfall_lag1",
]
missing = [c for c in required_cols if c not in df.columns]
if missing:
    raise ValueError(f"Kolom wajib hilang di TFT dataset: {missing}")

# =========================
# FEATURE SETUP
# =========================
static_categoricals = ["area", "cabang", "sku"]

known_reals = [
    "time_idx",
    "event_flag",
    "holiday_count",
    "rainfall_lag1",
]

lag_cols   = [c for c in df.columns if c.startswith("qty_lag")]
roll_cols  = [c for c in df.columns if c.startswith("qty_roll")]
extra_reals = []  # kalau nanti ada fitur tambahan

unknown_reals = ["qty"] + lag_cols + roll_cols + extra_reals

# isi NaN di known/unknown dengan 0 biar aman
for c in known_reals + unknown_reals:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce").fillna(0.0)

# pakai hanya baris train untuk fitting model
df_train = df.query("is_train == 1").copy()

# cek panjang seri
len_per_series = df_train.groupby(["cabang", "sku"]).size()
print("Min / median length train per series:",
      len_per_series.min(), len_per_series.median())

# TFT horizon = 5 bulan (sama kayak SARIMAX/LGBM)
max_prediction_length = 5

# encoder length jangan lebih dari min length
enc_max = 24
max_encoder_length = int(
    min(enc_max, max(6, len_per_series.min() - max_prediction_length))
)
min_encoder_length = max(6, max_encoder_length // 2)

print("min_encoder_length:", min_encoder_length)
print("max_encoder_length:", max_encoder_length)
print("max_prediction_length:", max_prediction_length)

# pastikan time_idx integer
df_train["time_idx"] = df_train["time_idx"].astype("int64")

# =========================
# BUILD GLOBAL TRAIN DATASET
# =========================
training = TimeSeriesDataSet(
    df_train,
    time_idx="time_idx",
    target="qty",
    group_ids=["cabang", "sku"],
    weight="sample_weight",
    min_encoder_length=min_encoder_length,
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=static_categoricals,
    static_reals=[],
    time_varying_known_categoricals=[],
    time_varying_known_reals=known_reals,
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=unknown_reals,
    target_normalizer=GroupNormalizer(groups=["cabang", "sku"]),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)

train_loader = training.to_dataloader(
    train=True,
    batch_size=64,
    num_workers=0,
)

print("Training samples:", len(training))

# =========================
# OPTUNA OBJECTIVE
# =========================
def build_model(trial: optuna.Trial) -> TemporalFusionTransformer:
    hidden_size  = trial.suggest_int("hidden_size", 8, 64)
    hidden_cont  = trial.suggest_int("hidden_continuous_size", 8, 64)
    att_heads    = trial.suggest_int("attention_head_size", 1, 4)
    dropout      = trial.suggest_float("dropout", 0.05, 0.3)
    lr           = trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True)
    lstm_layers  = trial.suggest_int("lstm_layers", 1, 3)

    model = TemporalFusionTransformer.from_dataset(
        training,
        learning_rate=lr,
        hidden_size=hidden_size,
        attention_head_size=att_heads,
        dropout=dropout,
        hidden_continuous_size=hidden_cont,
        lstm_layers=lstm_layers,
        loss=QuantileLoss(),
        log_interval=50,
        log_val_interval=1,
        reduce_on_plateau_patience=3,
    )
    return model


def objective(trial: optuna.Trial) -> float:
    torch.cuda.empty_cache()

    model = build_model(trial)

    early_stop = EarlyStopping(
        monitor="train_loss",
        patience=5,
        mode="min",
        check_on_train_epoch_end=True,  # pakai loss train
    )
    lr_monitor = LearningRateMonitor(logging_interval="epoch")

    trainer = Trainer(
        max_epochs=50,
        accelerator="cpu",
        gradient_clip_val=1.0,
        callbacks=[early_stop, lr_monitor],
        enable_progress_bar=False,
    )

    trainer.fit(
        model,
        train_dataloaders=train_loader,
    )

    # ambil train_loss terakhir sebagai objective
    score = float(trainer.callback_metrics["train_loss"].item())
    return score


# =========================
# RUN STUDY
# =========================
print("Mulai Optuna TFT (global 15 SKU, no explicit val)...")
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=20, show_progress_bar=False)

print("Best trial TFT:")
print("  train_loss:", study.best_trial.value)
print("  params    :", study.best_trial.params)

best_params = study.best_trial.params

# =========================
# TRAIN FINAL MODEL DENGAN PARAM TERBAIK
# =========================
final_model = build_model(study.best_trial)

early_stop_final = EarlyStopping(
    monitor="train_loss",
    patience=8,
    mode="min",
    check_on_train_epoch_end=True,
)
lr_monitor_final = LearningRateMonitor(logging_interval="epoch")

trainer_final = Trainer(
    max_epochs=60,
    accelerator="cpu",
    gradient_clip_val=1.0,
    callbacks=[early_stop_final, lr_monitor_final],
)

trainer_final.fit(
    final_model,
    train_dataloaders=train_loader,
)

print("Final TFT training selesai (global 15 SKU).")
print("Model siap dipakai untuk prediksi window test 5 bulan.")


Seed set to 42
[I 2025-11-13 17:34:07,785] A new study created in memory with name: no-name-ecf4daba-a995-4e75-a506-8a2dc8320db5


Rows TFT 15 fullfeat: 4965
Min / median length train per series: 41 41.0
min_encoder_length: 12
max_encoder_length: 24
max_prediction_length: 5
Training samples: 5400
Mulai Optuna TFT (global 15 SKU, no explicit val)...


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 776    | train
3  | prescalers                         | ModuleDict                      | 3.3 K  | train
4  | static_variable_selection          | VariableSelectionNetwork        | 35.4 K | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Best trial TFT:
  train_loss: 55.54610061645508
  params    : {'hidden_size': 57, 'hidden_continuous_size': 25, 'attention_head_size': 4, 'dropout': 0.16697316212768398, 'learning_rate': 0.0028010095636690877, 'lstm_layers': 1}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 776    | train
3  | prescalers                         | ModuleDict                      | 1.4 K  | train
4  | static_variable_selection          | VariableSelectionNetwork        | 15.1 K | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=60` reached.


Final TFT training selesai (global 15 SKU).
Model siap dipakai untuk prediksi window test 5 bulan.


In [25]:
print(dir(training))


['__add__', '__annotations__', '__class__', '__class_getitem__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__orig_bases__', '__parameters__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_categorical_encoders', '_check_params', '_check_tensors', '_collate_fn', '_constant_fill_strategy', '_construct_index', '_data_properties', '_data_to_tensors', '_get_auto_normalizer', '_get_lagged_names', '_group_ids', '_group_ids_mapping', '_lags', '_overwrite_values', '_preprocess_data', '_scalers', '_set_lagged_variables', '_set_target_normalizer', '_static_categoricals', '_static_reals', '_time_varying_known_categoricals', '_time_varying_known_reals', '_time_varying_unknown_categoricals', '_time_varying_unknown_rea

In [31]:
print("TYPE:", type(pred_output))
print("LENGTH:", len(pred_output) if hasattr(pred_output, "__len__") else "NO_LEN")

print("\nELEMENT TYPES:")
if hasattr(pred_output, "__len__"):
    for i, item in enumerate(pred_output):
        print(f"[{i}] type:", type(item))
        if hasattr(item, "shape"):
            print("    shape:", item.shape)
        if isinstance(item, list):
            print("    list_len:", len(item))
        if isinstance(item, dict):
            print("    dict_keys:", item.keys())
        print()


TYPE: <class 'pytorch_forecasting.models.base._base_model.Prediction'>
LENGTH: 5

ELEMENT TYPES:
[0] type: <class 'torch.Tensor'>
    shape: torch.Size([120, 5])

[1] type: <class 'NoneType'>

[2] type: <class 'pandas.core.frame.DataFrame'>
    shape: (120, 3)

[3] type: <class 'NoneType'>

[4] type: <class 'NoneType'>



In [32]:
# ============================================================
# pred_output = Prediction object (5 elemen)
# ============================================================

print("Parsing Prediction objectâ€¦")

pred_tensor = pred_output[0]    # tensor [N, H]
idx_df      = pred_output[2]    # DataFrame: cabang, sku, time_idx

print("Pred tensor:", type(pred_tensor))
print("Index DF shape:", idx_df.shape)

# convert tensor ke numpy
if isinstance(pred_tensor, torch.Tensor):
    y_np = pred_tensor.detach().cpu().numpy()
else:
    y_np = np.asarray(pred_tensor)

# handle 3D (rare)
if y_np.ndim == 3 and y_np.shape[-1] == 1:
    y_np = y_np[...,0]

n_samples, horizon = y_np.shape
print("Parsed pred shape:", y_np.shape)


Parsing Prediction objectâ€¦
Pred tensor: <class 'torch.Tensor'>
Index DF shape: (120, 3)
Parsed pred shape: (120, 5)


In [34]:
# ============================================================
# 6) MAP PREDIKSI KE BULAN TEST (is_test=1)
# ============================================================

rows = []

df_sorted = df_full.sort_values(
    ["cabang", "sku", "periode"]
).reset_index(drop=True)

for i in range(n_samples):
    cabang = idx_df.iloc[i]["cabang"]
    sku    = idx_df.iloc[i]["sku"]

    sub = df_sorted[
        (df_sorted["cabang"] == cabang) &
        (df_sorted["sku"] == sku)
    ].sort_values("periode").reset_index(drop=True)

    sub_test = sub[sub["is_test"] == 1].reset_index(drop=True)
    if sub_test.empty:
        continue

    preds = y_np[i]
    n_step = min(len(preds), len(sub_test), max_prediction_length)

    for h in range(n_step):
        r = sub_test.loc[h]
        rows.append({
            "cabang": cabang,
            "sku": sku,
            "periode": r["periode"],
            "qty_actual": float(r["qty"]),
            "y_pred": float(preds[h]),
            "horizon": h + 1,
        })

pred_df = pd.DataFrame(rows)
print("Total pred rows:", len(pred_df))
print(pred_df.head())


Total pred rows: 45
  cabang         sku    periode  qty_actual       y_pred  horizon
0    02A  BUVW001KSW 2024-06-01      2306.0  1747.093506        1
1    02A  BUVW001KSW 2024-07-01      2398.0  5472.687500        2
2    02A  BUVW001KSW 2024-08-01      1515.0  3544.433594        3
3    02A  BUVW001KSW 2024-09-01      5738.0  3693.134521        4
4    02A  BUVW001KSW 2024-10-01      1304.0  1592.668945        5


In [35]:
# ============================================================
# 7) METRIC
# ============================================================

def smape(a, f):
    a = np.asarray(a)
    f = np.asarray(f)
    denom = np.abs(a) + np.abs(f)
    return np.where(denom == 0, 0, 200 * np.abs(a - f) / denom)

metrics = []

for (cabang, sku), g in pred_df.groupby(["cabang", "sku"]):
    y = g["qty_actual"].values
    p = g["y_pred"].values
    err = p - y
    mae = np.mean(np.abs(err))
    rmse = np.sqrt(np.mean(err ** 2))
    mape = np.mean(np.where(y == 0, 0, np.abs(err) / np.abs(y))) * 100
    smp = np.mean(smape(y, p))

    metrics.append({
        "cabang": cabang,
        "sku": sku,
        "n_test": len(g),
        "RMSE_test": rmse,
        "MAE_test": mae,
        "MAPE%_test": mape,
        "sMAPE%_test": smp,
    })

metrics_df = pd.DataFrame(metrics)
print(metrics_df.head())


  cabang          sku  n_test    RMSE_test     MAE_test  MAPE%_test  \
0    02A   BUVW001KSW       5  1905.228857  1599.312402   68.837267   
1    05A   BUVW001KSW       5   231.641400   215.570410    5.062114   
2    13A  DOPQ001K002       5  1251.720550  1175.325928   40.766332   
3    13I   BUVW001KSW       5  9798.380908  7116.705371  107.397682   
4    14A   BUVW001KSW       5  2425.467525  1592.995508   32.088890   

   sMAPE%_test  
0    49.845666  
1     4.958109  
2    33.628851  
3    71.860727  
4    37.034215  


In [36]:
# ============================================================
# 8) SAVE
# ============================================================

pred_path = OUT_DIR / "tft_selected15_predictions.csv"
met_path  = OUT_DIR / "tft_selected15_metrics.csv"

pred_df.to_csv(pred_path, index=False)
metrics_df.to_csv(met_path, index=False)

print("\nSaved:")
print("  Predictions:", pred_path)
print("  Metrics    :", met_path)



Saved:
  Predictions: D:\Documents\Skripsi\demand-forecasting\outputs\tft_selected15\tft_selected15_predictions.csv
  Metrics    : D:\Documents\Skripsi\demand-forecasting\outputs\tft_selected15\tft_selected15_metrics.csv


cb lagi yang pake optuna dll

In [74]:
import pandas as pd
import numpy as np
from pytorch_forecasting.data import TimeSeriesDataSet
from pytorch_forecasting.data.encoders import GroupNormalizer

from pathlib import Path

PROJECT_ROOT   = Path(r"D:\Documents\Skripsi\demand-forecasting")
DATASET15_DIR  = PROJECT_ROOT / "data" / "dataset_15"

# 1) LOAD DATA TFT 120 SERI
df = pd.read_csv(
    DATASET15_DIR / "tft_dataset_15_fullfeat.csv",
    parse_dates=["periode"]
)

# sort & time_idx per (cabang, sku)
df = df.sort_values(["cabang","sku","periode"]).reset_index(drop=True)
df["time_idx"] = df.groupby(["cabang","sku"]).cumcount()

# 2) BERSIHKAN NA (WAJIB UNTUK TFT)

rolling_cols = [
    "qty_rollmean_3","qty_rollstd_3",
    "qty_rollmean_6","qty_rollstd_6",
    "qty_rollmean_12","qty_rollstd_12",
]
rolling_cols = [c for c in rolling_cols if c in df.columns]

lag_cols = [c for c in df.columns if c.startswith("qty_lag")]

# rolling: bfill/ffill per seri
if rolling_cols:
    df[rolling_cols] = (
        df.groupby(["cabang","sku"])[rolling_cols]
          .transform(lambda g: g.bfill().ffill())
    )

# lag qty: NaN -> 0
if lag_cols:
    df[lag_cols] = df[lag_cols].fillna(0)

# lag exog: NaN -> 0
for col in ["event_flag_lag1","holiday_count_lag1","rainfall_lag1"]:
    if col in df.columns:
        df[col] = df[col].fillna(0)

# sanity: cek sisa NA
na_left = df.isna().sum()
print("Sisa NA:")
print(na_left[na_left > 0])


Sisa NA:
Series([], dtype: int64)


In [75]:
train_df = df[df["is_train"] == 1].copy()

print("Baris train:", len(train_df))
print("Range time_idx train:", train_df["time_idx"].min(), "->", train_df["time_idx"].max())

# misal 6 bulan terakhir jadi validasi
training_cutoff = train_df["time_idx"].max() - 6
print("training_cutoff:", training_cutoff)

static_cat = ["cabang","sku"]

known_reals = [
    "time_idx",
    "event_flag","event_flag_lag1",
    "holiday_count","holiday_count_lag1",
    "rainfall_lag1",
]

unknown_reals = ["qty"] + rolling_cols + lag_cols

max_prediction_length = 1
min_encoder_length = 12
max_encoder_length = 24

training_ds = TimeSeriesDataSet(
    train_df,
    time_idx="time_idx",
    target="qty",
    group_ids=["cabang","sku"],
    weight="sample_weight",     # kamu sudah punya kolom ini

    min_encoder_length=min_encoder_length,
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,

    static_categoricals=static_cat,
    static_reals=[],
    time_varying_known_categoricals=[],
    time_varying_known_reals=known_reals,
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=unknown_reals,

    target_normalizer=GroupNormalizer(groups=["cabang","sku"]),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,

    # ini yang bagi TRAIN / VAL internal
    min_prediction_idx=training_cutoff + 1,
)

batch_size = 64

train_loader = training_ds.to_dataloader(
    train=True,
    batch_size=batch_size,
    num_workers=0,
)
val_loader = training_ds.to_dataloader(
    train=False,
    batch_size=batch_size,
    num_workers=0,
)

print("Jumlah sample train:", len(train_loader.dataset))
print("Jumlah sample val  :", len(val_loader.dataset))


Baris train: 4920
Range time_idx train: 0 -> 40
training_cutoff: 34
Jumlah sample train: 2160
Jumlah sample val  : 2160


In [76]:
from pytorch_forecasting.metrics import RMSE
from pytorch_forecasting import TemporalFusionTransformer
from lightning.pytorch import Trainer, seed_everything
from lightning.pytorch.callbacks import EarlyStopping
import random

seed_everything(42)

def sample_params():
    return {
        "hidden_size": random.choice([8, 16, 24, 32]),
        "lstm_layers": random.choice([1, 2]),
        "dropout": random.uniform(0.1, 0.4),
        "attention_head_size": random.choice([1, 2, 4]),
        "learning_rate": random.uniform(1e-4, 3e-3),
    }

N_RS = 10  # cukup buat skripsi

results = []

for i in range(N_RS):
    params = sample_params()

    model = TemporalFusionTransformer.from_dataset(
        training_ds,
        hidden_size=params["hidden_size"],
        lstm_layers=params["lstm_layers"],
        dropout=params["dropout"],
        attention_head_size=params["attention_head_size"],
        learning_rate=params["learning_rate"],
        output_size=1,
        loss=RMSE(),
        log_interval=10,
    )

    trainer = Trainer(
        max_epochs=20,
        accelerator="cpu",
        enable_progress_bar=True,
        callbacks=[EarlyStopping(monitor="val_loss", patience=3)]
    )

    trainer.fit(model, train_loader, val_loader)

    val_loss = trainer.callback_metrics["val_loss"].item()
    results.append((val_loss, params))

    print(f"[Trial {i+1}/{N_RS}] val_loss={val_loss:.4f} params={params}")

best_loss, best = sorted(results, key=lambda x: x[0])[0]
print("\n===== BEST RANDOM SEARCH PARAMS =====")
print("val_loss:", best_loss)
print("params  :", best)


Seed set to 42
ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 437    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 1.3 K  | train
5  | encoder_variable_selection         | VariableSelectio

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.
ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 437    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 1.3 K  | train
5  | encoder_variable_sel

[Trial 1/10] val_loss=474.6697 params={'hidden_size': 8, 'lstm_layers': 1, 'dropout': 0.32246514992794995, 'attention_head_size': 1, 'learning_rate': 0.0007473111406315861}


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[Trial 2/10] val_loss=668.5854 params={'hidden_size': 8, 'lstm_layers': 1, 'dropout': 0.2771477537347119, 'attention_head_size': 1, 'learning_rate': 0.000186411936370404}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.0 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.
ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores


[Trial 3/10] val_loss=328.5173 params={'hidden_size': 16, 'lstm_layers': 1, 'dropout': 0.25160658643100875, 'attention_head_size': 1, 'learning_rate': 0.0017276106825219778}



   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.2 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 31.8 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 7.1 K  | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 4.3 K  | train
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 4.3 K  

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[Trial 4/10] val_loss=301.7665 params={'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.2347627138851561, 'attention_head_size': 2, 'learning_rate': 0.0024473483243656973}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 437    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 1.3 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[Trial 5/10] val_loss=454.6925 params={'hidden_size': 8, 'lstm_layers': 1, 'dropout': 0.3094418184964681, 'attention_head_size': 2, 'learning_rate': 0.0009058268908477614}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.0 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.
ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.


[Trial 6/10] val_loss=429.7067 params={'hidden_size': 16, 'lstm_layers': 2, 'dropout': 0.13066308295595463, 'attention_head_size': 2, 'learning_rate': 0.00038047749281704563}


GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 26.1 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 5.8 K  | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 2.4 K  | train
8  | sta

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[Trial 7/10] val_loss=360.5050 params={'hidden_size': 24, 'lstm_layers': 2, 'dropout': 0.3421384819823141, 'attention_head_size': 4, 'learning_rate': 0.0014323321510483435}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 437    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 1.3 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[Trial 8/10] val_loss=344.5334 params={'hidden_size': 8, 'lstm_layers': 2, 'dropout': 0.12364005942353747, 'attention_head_size': 2, 'learning_rate': 0.002505273526333685}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.
ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.


[Trial 9/10] val_loss=354.9626 params={'hidden_size': 24, 'lstm_layers': 1, 'dropout': 0.3113715508644771, 'attention_head_size': 1, 'learning_rate': 0.0020176636238463594}


GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 26.1 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 5.8 K  | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 2.4 K  | train
8  | sta

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

[Trial 10/10] val_loss=477.8587 params={'hidden_size': 24, 'lstm_layers': 1, 'dropout': 0.3565953163045441, 'attention_head_size': 1, 'learning_rate': 0.001202366052613582}

===== BEST RANDOM SEARCH PARAMS =====
val_loss: 301.7664794921875
params  : {'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.2347627138851561, 'attention_head_size': 2, 'learning_rate': 0.0024473483243656973}


In [78]:
import optuna
from pytorch_forecasting.metrics import RMSE
from pytorch_forecasting import TemporalFusionTransformer
from lightning.pytorch import Trainer

def objective(trial):
    # fix yang bagus dari RS, yang di-finetune cuma yang masuk akal
    hidden_size = trial.suggest_categorical("hidden_size", [best["hidden_size"]])
    lstm_layers = trial.suggest_categorical("lstm_layers", [best["lstm_layers"]])
    attention   = trial.suggest_categorical("attention_head_size", [best["attention_head_size"]])

    dropout = trial.suggest_float(
        "dropout",
        max(best["dropout"] - 0.1, 0.05),
        min(best["dropout"] + 0.1, 0.5)
    )

    lr = trial.suggest_float(
        "learning_rate",
        max(best["learning_rate"] * 0.5, 1e-4),
        best["learning_rate"] * 1.5
    )

    model = TemporalFusionTransformer.from_dataset(
        training_ds,
        hidden_size=hidden_size,
        lstm_layers=lstm_layers,
        dropout=dropout,
        attention_head_size=attention,
        learning_rate=lr,
        output_size=1,
        loss=RMSE(),
    )

    trainer = Trainer(
        max_epochs=20,
        accelerator="cpu",
        enable_progress_bar=False
    )

    trainer.fit(model, train_loader, val_loader)

    val_loss = trainer.callback_metrics["val_loss"].item()
    return val_loss

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=10)   # 10 trial BO cukup buat skripsi

final_params = study.best_params
print("\n===== BEST BO PARAMS =====")
print(final_params)


[I 2025-11-14 09:52:23,246] A new study created in memory with name: no-name-4593eb6b-2433-44e8-ab97-22107e755218
ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | Variable


===== BEST BO PARAMS =====
{'hidden_size': 32, 'lstm_layers': 1, 'attention_head_size': 2, 'dropout': 0.1607588981551044, 'learning_rate': 0.0030631685324575613}


In [79]:
from pytorch_forecasting.metrics import RMSE
from pytorch_forecasting import TemporalFusionTransformer
from lightning.pytorch import Trainer
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint

# final_params dari Optuna:
print("FINAL PARAMS (BO):", final_params)

final_model = TemporalFusionTransformer.from_dataset(
    training_ds,
    hidden_size=final_params["hidden_size"],
    lstm_layers=final_params["lstm_layers"],
    dropout=final_params["dropout"],
    attention_head_size=final_params["attention_head_size"],
    learning_rate=final_params["learning_rate"],
    output_size=1,
    loss=RMSE(),
)

checkpoint_cb = ModelCheckpoint(
    dirpath="tft_checkpoints_120sku",
    filename="tft_120sku_final",
    monitor="val_loss",
    mode="min",
    save_top_k=1
)

early_cb = EarlyStopping(
    monitor="val_loss",
    patience=8,
    mode="min"
)

final_trainer = Trainer(
    max_epochs=50,
    accelerator="cpu",
    enable_progress_bar=True,
    callbacks=[early_cb, checkpoint_cb]
)

final_trainer.fit(final_model, train_loader, val_loader)

print("Checkpoint terbaik:", checkpoint_cb.best_model_path)


GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.2 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 31.8 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 7.1 K  | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 4.3 K  | train
8  | sta

FINAL PARAMS (BO): {'hidden_size': 32, 'lstm_layers': 1, 'attention_head_size': 2, 'dropout': 0.1607588981551044, 'learning_rate': 0.0030631685324575613}


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Checkpoint terbaik: D:\Documents\Skripsi\demand-forecasting\tft_checkpoints_120sku\tft_120sku_final.ckpt


In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
from pytorch_forecasting.data import TimeSeriesDataSet

# asumsi: df, training_ds, final_model sudah ada

PROJECT_ROOT = Path(r"D:\Documents\Skripsi\demand-forecasting")
OUT_DIR      = PROJECT_ROOT / "outputs"
OUT_DIR.mkdir(parents=True, exist_ok=True)

# ---------- PREDIKSI TRAIN (boleh pakai kode lama kamu) ----------
train_raw = final_model.predict(training_ds, return_index=True)
train_pred = train_raw.output.squeeze(-1)
train_idx  = train_raw.index

df_train_pred = pd.DataFrame({
    "cabang":   train_idx["cabang"],
    "sku":      train_idx["sku"],
    "time_idx": train_idx["time_idx"],
    "qty_pred": train_pred,
}).merge(
    df[["cabang","sku","time_idx","qty","periode","is_train"]],
    on=["cabang","sku","time_idx"],
    how="left"
)

df_train_pred = df_train_pred[df_train_pred["is_train"] == 1].copy()

# ---------- PREDIKSI FULL (train+val+test) ----------
full_ds = TimeSeriesDataSet.from_dataset(
    training_ds,
    df,
    stop_randomization=True
)

full_raw  = final_model.predict(full_ds, return_index=True)
full_pred = full_raw.output.squeeze(-1)
full_idx  = full_raw.index

df_pred_all = pd.DataFrame({
    "cabang":   full_idx["cabang"],
    "sku":      full_idx["sku"],
    "time_idx": full_idx["time_idx"],
    "qty_pred": full_pred,
}).merge(
    df[["cabang","sku","time_idx","qty","periode","is_test"]],
    on=["cabang","sku","time_idx"],
    how="left"
)

# ---------- COLLAPSE DUPLIKAT WINDOW ----------
# Di sini yang tadinya Oktober (time_idx 45) keprediksi berkali-kali
# kita ratakan per kombinasi unik
df_pred_all = (
    df_pred_all
    .groupby(["cabang","sku","time_idx","periode"], as_index=False)
    .agg(
        qty_pred=("qty_pred", "mean"),
        qty=("qty", "first"),
        is_test=("is_test", "max")
    )
)

# ---------- FILTER HANYA PERIODE TEST (Junâ€“Okt 2024) ----------
EVAL_START = pd.Timestamp("2024-06-01")
EVAL_END   = pd.Timestamp("2024-10-01")

df_test_pred = df_pred_all[
    (df_pred_all["is_test"] == 1) &
    (df_pred_all["periode"] >= EVAL_START) &
    (df_pred_all["periode"] <= EVAL_END)
].copy()

print("Train pred shape:", df_train_pred.shape)
print("Test  pred shape:", df_test_pred.shape)
print("Unique n_test per seri:",
      df_test_pred.groupby(["cabang","sku"])["periode"].nunique().unique())



ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores


Train pred shape: (2160, 7)
Test  pred shape: (45, 7)
Unique n_test per seri: [5]


In [88]:
def calc_metrics(dfm):
    dfm = dfm.dropna(subset=["qty","qty_pred"]).copy()

    err = dfm["qty_pred"] - dfm["qty"]

    rmse = float(np.sqrt(np.mean(err**2)))
    mae  = float(np.mean(np.abs(err)))

    nonzero = dfm["qty"] != 0
    if nonzero.sum() > 0:
        mape = float(
            np.mean(
                np.abs(dfm.loc[nonzero, "qty_pred"] - dfm.loc[nonzero, "qty"])
                / dfm.loc[nonzero, "qty"]
            ) * 100
        )
    else:
        mape = np.nan

    smape = float(
        np.mean(
            2 * np.abs(dfm["qty_pred"] - dfm["qty"])
            / (np.abs(dfm["qty_pred"]) + np.abs(dfm["qty"]) + 1e-9)
        ) * 100
    )

    mse = float(np.mean(err**2))

    return rmse, mae, mape, smape, mse

# ---------- METRIC GLOBAL ----------
rmse_tr, mae_tr, mape_tr, smape_tr, mse_tr = calc_metrics(df_train_pred)
rmse_te, mae_te, mape_te, smape_te, mse_te = calc_metrics(df_test_pred)

summary = pd.DataFrame([{
    "RMSE_train": rmse_tr,
    "MAE_train": mae_tr,
    "MAPE_train": mape_tr,
    "sMAPE_train": smape_tr,
    "MSE_train": mse_tr,
    "RMSE_test": rmse_te,
    "MAE_test": mae_te,
    "MAPE_test": mape_te,
    "sMAPE_test": smape_te,
    "MSE_test": mse_te,
}])

# ---------- METRIC PER CABANG + SKU (hanya yang ada test) ----------
rows = []
for (cab, sku), g in df_test_pred.groupby(["cabang","sku"], sort=False):
    rmse, mae, mape, smape, mse = calc_metrics(g)
    rows.append({
        "cabang": cab,
        "sku": sku,
        "n_test": len(g),
        "MSE_test": mse,
        "RMSE_test": rmse,
        "MAE_test": mae,
        "MAPE%_test": mape,
        "sMAPE%_test": smape,
    })

metrics_by_series = pd.DataFrame(rows)

# ---------- SAVE KE FOLDER OUTPUTS ----------
train_path   = OUT_DIR / "tft_120sku_train_predictions.csv"
test_path    = OUT_DIR / "tft_120sku_test_predictions.csv"
summary_path = OUT_DIR / "tft_120sku_metrics_summary.csv"
series_path  = OUT_DIR / "tft_120sku_metrics_by_series.csv"

df_train_pred.to_csv(train_path, index=False)
df_test_pred.to_csv(test_path, index=False)
summary.to_csv(summary_path, index=False)
metrics_by_series.to_csv(series_path, index=False)

print("\nSaved:")
print(" -", train_path)
print(" -", test_path)
print(" -", summary_path)
print(" -", series_path)



Saved:
 - D:\Documents\Skripsi\demand-forecasting\outputs\tft_120sku_train_predictions.csv
 - D:\Documents\Skripsi\demand-forecasting\outputs\tft_120sku_test_predictions.csv
 - D:\Documents\Skripsi\demand-forecasting\outputs\tft_120sku_metrics_summary.csv
 - D:\Documents\Skripsi\demand-forecasting\outputs\tft_120sku_metrics_by_series.csv


coba yg spike jadi flag

In [99]:
from pathlib import Path
import numpy as np
import pandas as pd
import random

from pytorch_forecasting.data import TimeSeriesDataSet
from pytorch_forecasting.data.encoders import GroupNormalizer
from pytorch_forecasting import TemporalFusionTransformer
from pytorch_forecasting.metrics import RMSE

from lightning.pytorch import Trainer, seed_everything
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint

In [100]:
PROJECT_ROOT   = Path(r"D:\Documents\Skripsi\demand-forecasting")
DATASET15_DIR  = PROJECT_ROOT / "data" / "dataset_15"
OUT_DIR        = PROJECT_ROOT / "outputs"
CKPT_DIR       = PROJECT_ROOT / "tft_checkpoints_eligible"

for d in [DATASET15_DIR, OUT_DIR, CKPT_DIR]:
    d.mkdir(parents=True, exist_ok=True)

WIN_START  = pd.Timestamp("2021-01-01")
TRAIN_END  = pd.Timestamp("2024-05-01")
EVAL_START = pd.Timestamp("2024-06-01")
EVAL_END   = pd.Timestamp("2024-10-01")

seed_everything(42)

Seed set to 42


42

In [101]:
df = pd.read_csv(
    DATASET15_DIR / "tft_dataset_15_fullfeat.csv",
    parse_dates=["periode"]
)

# sort & time_idx per seri
df = df.sort_values(["cabang", "sku", "periode"]).reset_index(drop=True)
df["time_idx"] = df.groupby(["cabang", "sku"]).cumcount()

In [102]:
def build_series_info(df_src: pd.DataFrame,
                      win_start: pd.Timestamp,
                      train_end: pd.Timestamp) -> pd.DataFrame:
    w = df_src.query("periode >= @win_start and periode <= @train_end").copy()

    # has_may24 (periode persis 2024-05-01)
    has_may = (
        w.query("periode == @train_end")
         .groupby(["cabang", "sku"], as_index=False)
         .size()
         .rename(columns={"size": "has_may24"})
    )

    # agregat dasar
    agg = (
        w.groupby(["cabang", "sku"], as_index=False)
         .agg(
             n_months=("qty", "size"),
             nonzero_months=("qty", lambda s: (s > 0).sum()),
             total_qty=("qty", "sum")
         )
    )

    # 12 bulan terakhir
    last12 = (
        w.sort_values(["cabang", "sku", "periode"])
         .groupby(["cabang", "sku"], as_index=False)
         .agg(qty_12m=("qty", lambda s: s.tail(12).sum()))
    )

    # 6 bulan terakhir
    last6 = (
        w.sort_values(["cabang", "sku", "periode"])
         .groupby(["cabang", "sku"], as_index=False)
         .agg(qty_6m=("qty", lambda s: s.tail(6).sum()))
    )

    # zero_ratio_train, n_train, last_nz pakai is_train
    t = df_src.loc[df_src["is_train"] == 1].copy()

    zr = (
        t.groupby(["cabang", "sku"], as_index=False)["qty"]
         .apply(lambda s: (s == 0).mean())
         .rename(columns={"qty": "zero_ratio_train"})
    )

    ntr = (
        t.groupby(["cabang", "sku"], as_index=False)["qty"]
         .count()
         .rename(columns={"qty": "n_train"})
    )

    nz = (
        t.loc[t["qty"] > 0]
         .groupby(["cabang", "sku"], as_index=False)["periode"]
         .max()
         .rename(columns={"periode": "last_nz"})
    )

    info = (
        agg.merge(last12, on=["cabang", "sku"], how="left")
           .merge(last6,  on=["cabang", "sku"], how="left")
           .merge(has_may, on=["cabang", "sku"], how="left")
           .merge(zr, on=["cabang", "sku"], how="left")
           .merge(ntr, on=["cabang", "sku"], how="left")
           .merge(nz, on=["cabang", "sku"], how="left")
    )

    info["has_may24"] = info["has_may24"].fillna(0).gt(0)

    for c in ["zero_ratio_train", "n_train", "qty_12m", "qty_6m"]:
        if c in info.columns:
            info[c] = info[c].fillna(0)

    # months_since_last_nz
    info["last_nz"] = pd.to_datetime(info["last_nz"], errors="coerce")
    train_end_per = train_end.to_period("M")

    info["months_since_last_nz"] = 999
    mask = info["last_nz"].notna()
    if mask.any():
        last_nz_per = info.loc[mask, "last_nz"].dt.to_period("M")
        diff_months = train_end_per.ordinal - last_nz_per.astype("int64")
        info.loc[mask, "months_since_last_nz"] = diff_months.values

    info["months_since_last_nz"] = info["months_since_last_nz"].astype("int64")

    # alive_recent: masih ada penjualan dekat akhir train
    info["alive_recent"] = (
        (info["qty_6m"] > 0) &
        (info["months_since_last_nz"] <= 3)
    ).astype(int)

    return info


print("Train end date:", TRAIN_END)
series_info = build_series_info(df, WIN_START, TRAIN_END)

info_path = DATASET15_DIR / "series_info_full.csv"
series_info.to_csv(info_path, index=False)

print("Saved series_info_full:", info_path)
print("Total seri:", len(series_info))


Train end date: 2024-05-01 00:00:00
Saved series_info_full: D:\Documents\Skripsi\demand-forecasting\data\dataset_15\series_info_full.csv
Total seri: 120


In [103]:
series_info["eligible_tft"] = (
    (series_info["n_train"] >= 30) &           # cukup panjang train
    (series_info["zero_ratio_train"] <= 0.4) & # bulan nol tidak mendominasi
    (series_info["nonzero_months"] >= 20) &    # banyak bulan laku
    (series_info["qty_12m"] >= 200) &          # volume 12 bulan terakhir
    (series_info["alive_recent"] == 1)         # masih "hidup" dekat 2024
).astype(int)

print("Eligible TFT value_counts:")
print(series_info["eligible_tft"].value_counts())

series_info.to_csv(info_path, index=False)

# merge ke df
df = df.merge(
    series_info[["cabang", "sku", "eligible_tft"]],
    on=["cabang", "sku"],
    how="left"
)

df["eligible_tft"] = df["eligible_tft"].fillna(0).astype(int)

# filter hanya seri yang eligible_tft == 1
df = df[df["eligible_tft"] == 1].copy()
df = df.sort_values(["cabang", "sku", "periode"]).reset_index(drop=True)

n_series_tft = df.drop_duplicates(subset=["cabang", "sku"]).shape[0]
print("Seri dipakai TFT:", n_series_tft)

# rebuild time_idx setelah filter
df["time_idx"] = df.groupby(["cabang", "sku"]).cumcount()


Eligible TFT value_counts:
eligible_tft
1    120
Name: count, dtype: int64
Seri dipakai TFT: 120


In [104]:
rolling_cols = [
    "qty_rollmean_3","qty_rollstd_3",
    "qty_rollmean_6","qty_rollstd_6",
    "qty_rollmean_12","qty_rollstd_12",
]
rolling_cols = [c for c in rolling_cols if c in df.columns]

lag_cols = [c for c in df.columns if c.startswith("qty_lag")]

# rolling: bfill/ffill per seri
if rolling_cols:
    df[rolling_cols] = (
        df.groupby(["cabang", "sku"])[rolling_cols]
          .transform(lambda g: g.bfill().ffill())
    )

# lag qty: NaN -> 0
if lag_cols:
    df[lag_cols] = df[lag_cols].fillna(0)

# lag exog: NaN -> 0
for col in ["event_flag_lag1", "holiday_count_lag1", "rainfall_lag1"]:
    if col in df.columns:
        df[col] = df[col].fillna(0)

# spike_flag NaN -> 0 (kalau ada)
if "spike_flag" in df.columns:
    df["spike_flag"] = df["spike_flag"].fillna(0)

na_left = df.isna().sum()
print("Sisa NA:")
print(na_left[na_left > 0])

Sisa NA:
Series([], dtype: int64)


In [105]:
train_df = df[df["is_train"] == 1].copy()

print("Baris train:", len(train_df))
print("Range time_idx train:", train_df["time_idx"].min(), "->", train_df["time_idx"].max())

# 6 bulan terakhir jadi validation internal
training_cutoff = train_df["time_idx"].max() - 6
print("training_cutoff:", training_cutoff)

static_cat = ["cabang", "sku"]

known_reals = [
    "time_idx",
    "event_flag", "event_flag_lag1",
    "holiday_count", "holiday_count_lag1",
    "rainfall_lag1",
]

# pakai spike_flag kalau ada
if "spike_flag" in df.columns:
    known_reals.append("spike_flag")

# tambahkan kalender kalau ada
for col in ["month", "year", "qtr"]:
    if col in df.columns and col not in known_reals:
        known_reals.append(col)

unknown_reals = ["qty"] + rolling_cols + lag_cols

max_prediction_length = 1
min_encoder_length   = 12
max_encoder_length   = 24

training_ds = TimeSeriesDataSet(
    train_df,
    time_idx="time_idx",
    target="qty",
    group_ids=["cabang", "sku"],
    weight="sample_weight",     # sample_weight sudah ada di dataset kamu

    min_encoder_length=min_encoder_length,
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,

    static_categoricals=static_cat,
    static_reals=[],
    time_varying_known_categoricals=[],
    time_varying_known_reals=known_reals,
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=unknown_reals,

    target_normalizer=GroupNormalizer(groups=["cabang", "sku"]),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,

    min_prediction_idx=training_cutoff + 1,
)

batch_size = 64

train_loader = training_ds.to_dataloader(
    train=True,
    batch_size=batch_size,
    num_workers=0,
)
val_loader = training_ds.to_dataloader(
    train=False,
    batch_size=batch_size,
    num_workers=0,
)

print("Jumlah sample train:", len(train_loader.dataset))
print("Jumlah sample val  :", len(val_loader.dataset))


Baris train: 4920
Range time_idx train: 0 -> 40
training_cutoff: 34
Jumlah sample train: 2160
Jumlah sample val  : 2160


In [106]:
def sample_params():
    return {
        "hidden_size":         random.choice([16, 24, 32]),
        "lstm_layers":         random.choice([1, 2]),
        "dropout":             random.uniform(0.1, 0.3),
        "attention_head_size": random.choice([2, 4]),
        "learning_rate":       random.uniform(1e-3, 3e-3),
    }

N_RS = 10
rs_results = []

for i in range(N_RS):
    params = sample_params()

    model = TemporalFusionTransformer.from_dataset(
        training_ds,
        hidden_size=params["hidden_size"],
        lstm_layers=params["lstm_layers"],
        dropout=params["dropout"],
        attention_head_size=params["attention_head_size"],
        learning_rate=params["learning_rate"],
        output_size=1,
        loss=RMSE(),
        log_interval=10,
    )

    trainer = Trainer(
        max_epochs=20,
        accelerator="cpu",
        enable_progress_bar=True,
        callbacks=[EarlyStopping(monitor="val_loss", patience=3)]
    )

    trainer.fit(model, train_loader, val_loader)

    val_loss = trainer.callback_metrics["val_loss"].item()
    rs_results.append((val_loss, params))

    print(f"[RS Trial {i+1}/{N_RS}] val_loss={val_loss:.4f} params={params}")

best_loss, best = sorted(rs_results, key=lambda x: x[0])[0]
print("\n===== BEST RANDOM SEARCH PARAMS =====")
print("val_loss:", best_loss)
print("params  :", best)

ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 528    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.2 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[RS Trial 1/10] val_loss=247.9216 params={'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.10500215104453339, 'attention_head_size': 4, 'learning_rate': 0.0014897837076069524}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 528    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.0 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.
ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores


[RS Trial 2/10] val_loss=285.5254 params={'hidden_size': 16, 'lstm_layers': 1, 'dropout': 0.23533989748458226, 'attention_head_size': 2, 'learning_rate': 0.002180985024898079}



   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 528    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.0 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 21.4 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 7.4 K  | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K  | train
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K  

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.
ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores


[RS Trial 3/10] val_loss=268.1265 params={'hidden_size': 16, 'lstm_layers': 1, 'dropout': 0.11873904797231849, 'attention_head_size': 2, 'learning_rate': 0.002010710576206725}



   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 528    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.0 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 21.4 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 7.4 K  | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K  | train
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K  

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[RS Trial 4/10] val_loss=295.8825 params={'hidden_size': 16, 'lstm_layers': 1, 'dropout': 0.24320392258448068, 'attention_head_size': 4, 'learning_rate': 0.0014408812440813933}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 528    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.2 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[RS Trial 5/10] val_loss=327.7219 params={'hidden_size': 32, 'lstm_layers': 2, 'dropout': 0.2618860913355653, 'attention_head_size': 2, 'learning_rate': 0.0025176147342595347}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 528    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.0 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[RS Trial 6/10] val_loss=301.6441 params={'hidden_size': 16, 'lstm_layers': 2, 'dropout': 0.16805010330359837, 'attention_head_size': 2, 'learning_rate': 0.0014306275242151776}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 528    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.
ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.


[RS Trial 7/10] val_loss=241.2613 params={'hidden_size': 24, 'lstm_layers': 1, 'dropout': 0.11854916867602959, 'attention_head_size': 2, 'learning_rate': 0.0017179587609692567}


GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 528    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 30.0 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 9.5 K  | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 2.4 K  | train
8  | sta

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.
ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.


[RS Trial 8/10] val_loss=311.2032 params={'hidden_size': 24, 'lstm_layers': 2, 'dropout': 0.261425654654876, 'attention_head_size': 4, 'learning_rate': 0.0020724561829094015}


GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 528    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 30.0 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 9.5 K  | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 2.4 K  | train
8  | sta

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[RS Trial 9/10] val_loss=295.9211 params={'hidden_size': 24, 'lstm_layers': 1, 'dropout': 0.2104081262546454, 'attention_head_size': 4, 'learning_rate': 0.002154704290513524}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 528    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.2 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[RS Trial 10/10] val_loss=234.7875 params={'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.10916487673113245, 'attention_head_size': 2, 'learning_rate': 0.002546136681577384}

===== BEST RANDOM SEARCH PARAMS =====
val_loss: 234.78750610351562
params  : {'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.10916487673113245, 'attention_head_size': 2, 'learning_rate': 0.002546136681577384}


In [107]:
import optuna

def objective(trial):
    hidden_size = trial.suggest_categorical("hidden_size", [best["hidden_size"]])
    lstm_layers = trial.suggest_categorical("lstm_layers", [best["lstm_layers"]])
    attention   = trial.suggest_categorical("attention_head_size", [best["attention_head_size"]])

    dropout = trial.suggest_float(
        "dropout",
        max(best["dropout"] - 0.1, 0.05),
        min(best["dropout"] + 0.1, 0.4)
    )

    lr = trial.suggest_float(
        "learning_rate",
        max(best["learning_rate"] * 0.5, 1e-4),
        best["learning_rate"] * 1.5
    )

    model = TemporalFusionTransformer.from_dataset(
        training_ds,
        hidden_size=hidden_size,
        lstm_layers=lstm_layers,
        dropout=dropout,
        attention_head_size=attention,
        learning_rate=lr,
        output_size=1,
        loss=RMSE(),
    )

    trainer = Trainer(
        max_epochs=20,
        accelerator="cpu",
        enable_progress_bar=False,
        callbacks=[EarlyStopping(monitor="val_loss", patience=3)]
    )

    trainer.fit(model, train_loader, val_loader)

    val_loss = trainer.callback_metrics["val_loss"].item()
    return val_loss


study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=10)

final_params = study.best_params
print("\n===== BEST BO PARAMS =====")
print(final_params)

[I 2025-11-14 14:54:23,760] A new study created in memory with name: no-name-44756df3-611d-4c11-9636-a7bceb3f05c1
ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 528    | train
4  | static_variable_selection          | Variable


===== BEST BO PARAMS =====
{'hidden_size': 32, 'lstm_layers': 1, 'attention_head_size': 2, 'dropout': 0.19367880779764485, 'learning_rate': 0.0026153786373433216}


In [108]:
final_model = TemporalFusionTransformer.from_dataset(
    training_ds,
    hidden_size=final_params["hidden_size"],
    lstm_layers=final_params["lstm_layers"],
    dropout=final_params["dropout"],
    attention_head_size=final_params["attention_head_size"],
    learning_rate=final_params["learning_rate"],
    output_size=1,
    loss=RMSE(),
)

checkpoint_cb = ModelCheckpoint(
    dirpath=CKPT_DIR,
    filename="tft_eligible_final",
    monitor="val_loss",
    mode="min",
    save_top_k=1
)

early_cb = EarlyStopping(
    monitor="val_loss",
    patience=8,
    mode="min"
)

final_trainer = Trainer(
    max_epochs=50,
    accelerator="cpu",
    enable_progress_bar=True,
    callbacks=[early_cb, checkpoint_cb]
)

final_trainer.fit(final_model, train_loader, val_loader)

print("Checkpoint terbaik:", checkpoint_cb.best_model_path)


GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | RMSE                            | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 528    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.2 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 38.0 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 11.6 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 4.3 K  | train
8  | sta

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.


Checkpoint terbaik: D:\Documents\Skripsi\demand-forecasting\tft_checkpoints_eligible\tft_eligible_final.ckpt


In [109]:
# ---------- PREDIKSI TRAIN ----------
train_raw = final_model.predict(training_ds, return_index=True)
train_pred = train_raw.output.squeeze(-1)
train_idx  = train_raw.index

df_train_pred = pd.DataFrame({
    "cabang":   train_idx["cabang"],
    "sku":      train_idx["sku"],
    "time_idx": train_idx["time_idx"],
    "qty_pred": train_pred,
}).merge(
    df[["cabang", "sku", "time_idx", "qty", "periode", "is_train"]],
    on=["cabang", "sku", "time_idx"],
    how="left"
)

df_train_pred = df_train_pred[df_train_pred["is_train"] == 1].copy()

# ---------- PREDIKSI FULL (train+val+test) ----------
full_ds = TimeSeriesDataSet.from_dataset(
    training_ds,
    df,
    stop_randomization=True
)

full_raw  = final_model.predict(full_ds, return_index=True)
full_pred = full_raw.output.squeeze(-1)
full_idx  = full_raw.index

df_pred_all = pd.DataFrame({
    "cabang":   full_idx["cabang"],
    "sku":      full_idx["sku"],
    "time_idx": full_idx["time_idx"],
    "qty_pred": full_pred,
}).merge(
    df[["cabang","sku","time_idx","qty","periode","is_test"]],
    on=["cabang","sku","time_idx"],
    how="left"
)

# collapse duplikat window per (cabang, sku, time_idx, periode)
df_pred_all = (
    df_pred_all
    .groupby(["cabang","sku","time_idx","periode"], as_index=False)
    .agg(
        qty_pred=("qty_pred", "mean"),
        qty=("qty", "first"),
        is_test=("is_test", "max")
    )
)

# filter hanya periode test Junâ€“Okt 2024
df_test_pred = df_pred_all[
    (df_pred_all["is_test"] == 1) &
    (df_pred_all["periode"] >= EVAL_START) &
    (df_pred_all["periode"] <= EVAL_END)
].copy()

print("Train pred shape:", df_train_pred.shape)
print("Test  pred shape:", df_test_pred.shape)
print("Unique n_test per seri:",
      df_test_pred.groupby(["cabang","sku"])["periode"].nunique().unique())

# ---------- METRIC ----------
def calc_metrics(dfm: pd.DataFrame):
    dfm = dfm.dropna(subset=["qty", "qty_pred"]).copy()

    err = dfm["qty_pred"] - dfm["qty"]

    mse  = float(np.mean(err**2))
    rmse = float(np.sqrt(mse))
    mae  = float(np.mean(np.abs(err)))

    nonzero = dfm["qty"] != 0
    if nonzero.sum() > 0:
        mape = float(
            np.mean(
                np.abs(dfm.loc[nonzero, "qty_pred"] - dfm.loc[nonzero, "qty"])
                / dfm.loc[nonzero, "qty"]
            ) * 100
        )
    else:
        mape = np.nan

    smape = float(
        np.mean(
            2 * np.abs(dfm["qty_pred"] - dfm["qty"])
            / (np.abs(dfm["qty_pred"]) + np.abs(dfm["qty"]) + 1e-9)
        ) * 100
    )

    return rmse, mae, mape, smape, mse


rmse_tr, mae_tr, mape_tr, smape_tr, mse_tr = calc_metrics(df_train_pred)
rmse_te, mae_te, mape_te, smape_te, mse_te = calc_metrics(df_test_pred)

summary = pd.DataFrame([{
    "RMSE_train": rmse_tr,
    "MAE_train": mae_tr,
    "MAPE_train": mape_tr,
    "sMAPE_train": smape_tr,
    "MSE_train": mse_tr,
    "RMSE_test": rmse_te,
    "MAE_test": mae_te,
    "MAPE_test": mape_te,
    "sMAPE_test": smape_te,
    "MSE_test": mse_te,
}])

# metric per cabang+sku (yang ada test)
rows = []
for (cab, sku), g in df_test_pred.groupby(["cabang", "sku"], sort=False):
    rmse, mae, mape, smape, mse = calc_metrics(g)
    rows.append({
        "cabang": cab,
        "sku": sku,
        "n_test": len(g),
        "MSE_test": mse,
        "RMSE_test": rmse,
        "MAE_test": mae,
        "MAPE%_test": mape,
        "sMAPE%_test": smape,
    })

metrics_by_series = pd.DataFrame(rows)

# ---------- SAVE ----------
train_path   = OUT_DIR / "tft_eligible_train_predictions.csv"
test_path    = OUT_DIR / "tft_eligible_test_predictions.csv"
summary_path = OUT_DIR / "tft_eligible_metrics_summary.csv"
series_path  = OUT_DIR / "tft_eligible_metrics_by_series.csv"

df_train_pred.to_csv(train_path, index=False)
df_test_pred.to_csv(test_path, index=False)
summary.to_csv(summary_path, index=False)
metrics_by_series.to_csv(series_path, index=False)

print("\nSaved:")
print(" -", train_path)
print(" -", test_path)
print(" -", summary_path)
print(" -", series_path)

ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores


Train pred shape: (2160, 7)
Test  pred shape: (45, 7)
Unique n_test per seri: [5]

Saved:
 - D:\Documents\Skripsi\demand-forecasting\outputs\tft_eligible_train_predictions.csv
 - D:\Documents\Skripsi\demand-forecasting\outputs\tft_eligible_test_predictions.csv
 - D:\Documents\Skripsi\demand-forecasting\outputs\tft_eligible_metrics_summary.csv
 - D:\Documents\Skripsi\demand-forecasting\outputs\tft_eligible_metrics_by_series.csv


cb lebi bgs

In [111]:
import warnings
warnings.filterwarnings("ignore")

from pathlib import Path
import numpy as np
import pandas as pd

import torch
from lightning.pytorch import Trainer, seed_everything
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint

from pytorch_forecasting.data import TimeSeriesDataSet
from pytorch_forecasting.data.encoders import GroupNormalizer
from pytorch_forecasting.models import TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss

# =====================================================
# PATH & CONFIG
# =====================================================
PROJECT_ROOT  = Path(r"D:\Documents\Skripsi\demand-forecasting")
DATASET15_DIR = PROJECT_ROOT / "data" / "dataset_15"
OUT_DIR       = PROJECT_ROOT / "outputs"
OUT_DIR.mkdir(parents=True, exist_ok=True)

TFT_CHECKPOINT_DIR = PROJECT_ROOT / "tft_checkpoints_15sku_log"
TFT_CHECKPOINT_DIR.mkdir(parents=True, exist_ok=True)

EVAL_START = pd.Timestamp("2024-06-01")
EVAL_END   = pd.Timestamp("2024-10-01")

seed_everything(42)

# =====================================================
# 1. LOAD DATASET TFT & BANGUN SERIES_INFO
# =====================================================
df = pd.read_csv(
    DATASET15_DIR / "tft_dataset_15_fullfeat.csv",
    parse_dates=["periode"],
)

# pastikan sort
df = df.sort_values(["cabang", "sku", "periode"]).reset_index(drop=True)

# info seri berdasarkan bagian train
train_mask = df["is_train"] == 1
info = (
    df.loc[train_mask]
      .groupby(["cabang", "sku"], as_index=False)
      .agg(
          n_train=("qty", "size"),
          zero_ratio_train=("qty", lambda s: (s == 0).mean()),
          total_qty=("qty", "sum"),
          first_train_periode=("periode", "min"),
          last_train_periode=("periode", "max"),
      )
)

# aturan kelayakan TFT
info["eligible_model"] = (
    (info["n_train"] >= 30) &
    (info["zero_ratio_train"] <= 0.5) &
    (info["total_qty"] > 0)
).astype(int)

info_path = DATASET15_DIR / "series_info_full.csv"
info.to_csv(info_path, index=False)
print("Saved series_info_full:", info_path)
print("Total seri:", len(info), "| Eligible:", info["eligible_model"].sum())

# merge ke df
df = df.merge(
    info[["cabang", "sku", "n_train", "zero_ratio_train", "eligible_model"]],
    on=["cabang", "sku"],
    how="left"
)

df["eligible_model"] = df["eligible_model"].fillna(0).astype(int)

# keep hanya seri yang eligible
df = df[df["eligible_model"] == 1].copy()
df = df.sort_values(["cabang", "sku", "periode"]).reset_index(drop=True)

# time_idx per seri
df["time_idx"] = df.groupby(["cabang", "sku"]).cumcount()

# =====================================================
# 2. BERSIHKAN NA (LAG & ROLLING), SET SAMPLE_WEIGHT
# =====================================================
rolling_cols = [
    "qty_rollmean_3", "qty_rollstd_3",
    "qty_rollmean_6", "qty_rollstd_6",
    "qty_rollmean_12", "qty_rollstd_12",
]
rolling_cols = [c for c in rolling_cols if c in df.columns]

lag_cols = [c for c in df.columns if c.startswith("qty_lag")]

# rolling: per seri, bfill lalu ffill
if rolling_cols:
    df[rolling_cols] = (
        df.groupby(["cabang", "sku"])[rolling_cols]
          .transform(lambda g: g.bfill().ffill())
    )

# lag qty: NaN -> 0
if lag_cols:
    df[lag_cols] = df[lag_cols].fillna(0)

# lag exog: NaN -> 0
for col in ["event_flag_lag1", "holiday_count_lag1", "rainfall_lag1"]:
    if col in df.columns:
        df[col] = df[col].fillna(0)

# spike_flag sebagai fitur, weight dibatasi
if "spike_flag" not in df.columns:
    df["spike_flag"] = 0

df["sample_weight"] = np.where(df["spike_flag"] == 1, 2.0, 1.0)

# target log1p
df["qty_log"] = np.log1p(df["qty"].clip(lower=0))

# cek NA terakhir
na_left = df.isna().sum()
na_left = na_left[na_left > 0]
print("Sisa NA setelah cleaning:")
print(na_left)

# =====================================================
# 3. BANGUN TimeSeriesDataSet (TARGET = qty_log)
# =====================================================
train_df = df[df["is_train"] == 1].copy()

print("Baris train:", len(train_df))
print("Range time_idx train:", train_df["time_idx"].min(), "->", train_df["time_idx"].max())

# 6 bulan terakhir train sebagai internal validasi
training_cutoff = train_df["time_idx"].max() - 6
print("training_cutoff:", training_cutoff)

static_cat = ["cabang", "sku"]

known_reals = [
    "time_idx",
    "event_flag", "event_flag_lag1",
    "holiday_count", "holiday_count_lag1",
    "rainfall_lag1",
    "spike_flag",
]

unknown_reals = rolling_cols + lag_cols  # target qty_log tidak dimasukkan sebagai covariate

max_prediction_length = 1
min_encoder_length = 12
max_encoder_length = 24

training_ds = TimeSeriesDataSet(
    train_df,
    time_idx="time_idx",
    target="qty_log",
    group_ids=["cabang", "sku"],
    weight="sample_weight",

    min_encoder_length=min_encoder_length,
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,

    static_categoricals=static_cat,
    static_reals=[],
    time_varying_known_categoricals=[],
    time_varying_known_reals=known_reals,
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=unknown_reals,

    target_normalizer=GroupNormalizer(groups=["cabang", "sku"]),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,

    # bagi internal train/val
    min_prediction_idx=training_cutoff + 1,
)

batch_size = 64

train_loader = training_ds.to_dataloader(
    train=True,
    batch_size=batch_size,
    num_workers=0,
)
val_loader = training_ds.to_dataloader(
    train=False,
    batch_size=batch_size,
    num_workers=0,
)

print("Jumlah sample train:", len(train_loader.dataset))
print("Jumlah sample val  :", len(val_loader.dataset))

# =====================================================
# 4. RANDOM SEARCH PARAMETER (QuantileLoss q=0.5)
# =====================================================
def sample_params():
    return {
        "hidden_size": int(np.random.choice([8, 16, 24, 32])),
        "lstm_layers": int(np.random.choice([1, 2])),
        "dropout": float(np.random.uniform(0.1, 0.4)),
        "attention_head_size": int(np.random.choice([1, 2, 4])),
        "learning_rate": float(np.random.uniform(1e-4, 3e-3)),
    }


N_RS = 10
results = []

loss_q = QuantileLoss(quantiles=[0.5])  # robust ke spike, fokus median

for i in range(N_RS):
    params = sample_params()
    print(f"\n=== Random Search Trial {i+1}/{N_RS} ===")
    print(params)

    model = TemporalFusionTransformer.from_dataset(
        training_ds,
        hidden_size=int(params["hidden_size"]),
        lstm_layers=int(params["lstm_layers"]),
        dropout=float(params["dropout"]),
        attention_head_size=int(params["attention_head_size"]),
        learning_rate=float(params["learning_rate"]),
        loss=loss_q,
        output_size=1,
        log_interval=10,
        log_val_interval=1,
    )


    trainer = Trainer(
        max_epochs=20,
        accelerator="cpu",
        enable_progress_bar=True,
        callbacks=[
            EarlyStopping(monitor="val_loss", patience=3, mode="min")
        ],
    )

    trainer.fit(model, train_loader, val_loader)

    val_loss = float(trainer.callback_metrics["val_loss"].item())
    results.append((val_loss, params))
    print(f"[Trial {i+1}] val_loss={val_loss:.4f} params={params}")

best_loss, best_params = sorted(results, key=lambda x: x[0])[0]
print("\n===== BEST RANDOM SEARCH PARAMS =====")
print("val_loss:", best_loss)
print("params  :", best_params)

# =====================================================
# 5. TRAIN FINAL MODEL DENGAN PARAMETER TERBAIK
# =====================================================
final_model = TemporalFusionTransformer.from_dataset(
    training_ds,
    hidden_size=best_params["hidden_size"],
    lstm_layers=best_params["lstm_layers"],
    dropout=best_params["dropout"],
    attention_head_size=best_params["attention_head_size"],
    learning_rate=best_params["learning_rate"],
    loss=loss_q,
    output_size=1,
)

checkpoint_cb = ModelCheckpoint(
    dirpath=TFT_CHECKPOINT_DIR,
    filename="tft_15sku_log_final",
    monitor="val_loss",
    mode="min",
    save_top_k=1,
)

early_cb = EarlyStopping(
    monitor="val_loss",
    patience=8,
    mode="min",
)

final_trainer = Trainer(
    max_epochs=50,
    accelerator="cpu",
    enable_progress_bar=True,
    callbacks=[early_cb, checkpoint_cb],
)

final_trainer.fit(final_model, train_loader, val_loader)

print("Checkpoint terbaik:", checkpoint_cb.best_model_path)

# =====================================================
# 6. PREDIKSI TRAIN & FULL (BALIKKAN LOG -> LEVEL)
# =====================================================
# helper inverse transform
def inv_log(x):
    return np.expm1(x)

# ---------- TRAIN ----------
train_raw = final_model.predict(training_ds, return_index=True)
train_pred_log = train_raw.output.squeeze(-1)
train_idx = train_raw.index

df_train_pred = pd.DataFrame({
    "cabang":   train_idx["cabang"],
    "sku":      train_idx["sku"],
    "time_idx": train_idx["time_idx"],
    "qty_pred_log": train_pred_log,
})
df_train_pred["qty_pred"] = inv_log(df_train_pred["qty_pred_log"])

df_train_pred = df_train_pred.merge(
    df[["cabang", "sku", "time_idx", "qty", "periode", "is_train"]],
    on=["cabang", "sku", "time_idx"],
    how="left",
)

df_train_pred = df_train_pred[df_train_pred["is_train"] == 1].copy()

# ---------- FULL ----------
full_ds = TimeSeriesDataSet.from_dataset(
    training_ds,
    df,
    stop_randomization=True,
)

full_raw = final_model.predict(full_ds, return_index=True)
full_pred_log = full_raw.output.squeeze(-1)
full_idx = full_raw.index

df_pred_all = pd.DataFrame({
    "cabang":   full_idx["cabang"],
    "sku":      full_idx["sku"],
    "time_idx": full_idx["time_idx"],
    "qty_pred_log": full_pred_log,
})
df_pred_all["qty_pred"] = inv_log(df_pred_all["qty_pred_log"])

df_pred_all = df_pred_all.merge(
    df[["cabang", "sku", "time_idx", "qty", "periode", "is_test"]],
    on=["cabang", "sku", "time_idx"],
    how="left",
)

# collapse duplikat window: rata-rata prediksi per bulan
df_pred_all = (
    df_pred_all
    .groupby(["cabang", "sku", "time_idx", "periode"], as_index=False)
    .agg(
        qty_pred_log=("qty_pred_log", "mean"),
        qty_pred=("qty_pred", "mean"),
        qty=("qty", "first"),
        is_test=("is_test", "max"),
    )
)

# filter 5 bulan test
df_test_pred = df_pred_all[
    (df_pred_all["is_test"] == 1) &
    (df_pred_all["periode"] >= EVAL_START) &
    (df_pred_all["periode"] <= EVAL_END)
].copy()

print("Train pred shape:", df_train_pred.shape)
print("Test  pred shape:", df_test_pred.shape)
print("Unique n_test per seri:",
      df_test_pred.groupby(["cabang", "sku"])["periode"].nunique().unique())

# =====================================================
# 7. HITUNG METRIK (RMSE / MAE / MAPE / sMAPE / MSE)
# =====================================================
def calc_metrics(dfm):
    dfm = dfm.dropna(subset=["qty", "qty_pred"]).copy()
    err = dfm["qty_pred"] - dfm["qty"]

    mse  = float(np.mean(err**2))
    rmse = float(np.sqrt(mse))
    mae  = float(np.mean(np.abs(err)))

    nonzero = dfm["qty"] != 0
    if nonzero.sum() > 0:
        mape = float(
            np.mean(
                np.abs(dfm.loc[nonzero, "qty_pred"] - dfm.loc[nonzero, "qty"])
                / dfm.loc[nonzero, "qty"]
            ) * 100
        )
    else:
        mape = np.nan

    smape = float(
        np.mean(
            2 * np.abs(dfm["qty_pred"] - dfm["qty"])
            / (np.abs(dfm["qty_pred"]) + np.abs(dfm["qty"]) + 1e-9)
        ) * 100
    )

    return rmse, mae, mape, smape, mse

# global
rmse_tr, mae_tr, mape_tr, smape_tr, mse_tr = calc_metrics(df_train_pred)
rmse_te, mae_te, mape_te, smape_te, mse_te = calc_metrics(df_test_pred)

summary = pd.DataFrame([{
    "RMSE_train": rmse_tr,
    "MAE_train": mae_tr,
    "MAPE%_train": mape_tr,
    "sMAPE%_train": smape_tr,
    "MSE_train": mse_tr,
    "RMSE_test": rmse_te,
    "MAE_test": mae_te,
    "MAPE%_test": mape_te,
    "sMAPE%_test": smape_te,
    "MSE_test": mse_te,
}])

# per seri 5 bulan test
rows = []
for (cab, sku), g in df_test_pred.groupby(["cabang", "sku"], sort=False):
    rmse, mae, mape, smape, mse = calc_metrics(g)
    rows.append({
        "cabang": cab,
        "sku": sku,
        "n_test": len(g),
        "MSE_test": mse,
        "RMSE_test": rmse,
        "MAE_test": mae,
        "MAPE%_test": mape,
        "sMAPE%_test": smape,
    })

metrics_by_series = pd.DataFrame(rows)

# =====================================================
# 8. SAVE OUTPUT
# =====================================================
train_path   = OUT_DIR / "tft_15sku_log_train_predictions.csv"
test_path    = OUT_DIR / "tft_15sku_log_test_predictions.csv"
summary_path = OUT_DIR / "tft_15sku_log_metrics_summary.csv"
series_path  = OUT_DIR / "tft_15sku_log_metrics_by_series.csv"

df_train_pred.to_csv(train_path, index=False)
df_test_pred.to_csv(test_path, index=False)
summary.to_csv(summary_path, index=False)
metrics_by_series.to_csv(series_path, index=False)

print("\nSaved:")
print(" -", train_path)
print(" -", test_path)
print(" -", summary_path)
print(" -", series_path)


Seed set to 42


Saved series_info_full: D:\Documents\Skripsi\demand-forecasting\data\dataset_15\series_info_full.csv
Total seri: 120 | Eligible: 120
Sisa NA setelah cleaning:
Series([], dtype: int64)
Baris train: 4920
Range time_idx train: 0 -> 40
training_cutoff: 34
Jumlah sample train: 2160
Jumlah sample val  : 2160

=== Random Search Trial 1/10 ===
{'hidden_size': 24, 'lstm_layers': 2, 'dropout': 0.38521429192297485, 'attention_head_size': 4, 'learning_rate': 0.002361103900791031}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.
ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False


[Trial 1] val_loss=0.1211 params={'hidden_size': 24, 'lstm_layers': 2, 'dropout': 0.38521429192297485, 'attention_head_size': 4, 'learning_rate': 0.002361103900791031}

=== Random Search Trial 2/10 ===
{'hidden_size': 8, 'lstm_layers': 1, 'dropout': 0.23374982585607737, 'attention_head_size': 4, 'learning_rate': 0.00026844247528777846}


TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 437    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 1.3 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 10.7 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 3.4 K  | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 304    | train
8  | static_context_initial_hidden_lstm | 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[Trial 2] val_loss=0.1930 params={'hidden_size': 8, 'lstm_layers': 1, 'dropout': 0.23374982585607737, 'attention_head_size': 4, 'learning_rate': 0.00026844247528777846}

=== Random Search Trial 3/10 ===
{'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.2803345035229627, 'attention_head_size': 4, 'learning_rate': 0.00015969503345782712}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.2 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[Trial 3] val_loss=0.1920 params={'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.2803345035229627, 'attention_head_size': 4, 'learning_rate': 0.00015969503345782712}

=== Random Search Trial 4/10 ===
{'hidden_size': 16, 'lstm_layers': 2, 'dropout': 0.34973279224012654, 'attention_head_size': 2, 'learning_rate': 0.00010225842093894156}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.0 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[Trial 4] val_loss=0.2454 params={'hidden_size': 16, 'lstm_layers': 2, 'dropout': 0.34973279224012654, 'attention_head_size': 2, 'learning_rate': 0.00010225842093894156}

=== Random Search Trial 5/10 ===
{'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.285244452888315, 'attention_head_size': 2, 'learning_rate': 0.00162179365173349}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.2 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[Trial 5] val_loss=0.0950 params={'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.285244452888315, 'attention_head_size': 2, 'learning_rate': 0.00162179365173349}

=== Random Search Trial 6/10 ===
{'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.1873687420594126, 'attention_head_size': 4, 'learning_rate': 0.001259596817974241}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.2 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[Trial 6] val_loss=0.0862 params={'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.1873687420594126, 'attention_head_size': 4, 'learning_rate': 0.001259596817974241}

=== Random Search Trial 7/10 ===
{'hidden_size': 32, 'lstm_layers': 2, 'dropout': 0.39212665565243776, 'attention_head_size': 4, 'learning_rate': 0.0014226029542294043}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.2 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[Trial 7] val_loss=0.1025 params={'hidden_size': 32, 'lstm_layers': 2, 'dropout': 0.39212665565243776, 'attention_head_size': 4, 'learning_rate': 0.0014226029542294043}

=== Random Search Trial 8/10 ===
{'hidden_size': 24, 'lstm_layers': 2, 'dropout': 0.15990213464750794, 'attention_head_size': 4, 'learning_rate': 0.002951369568839686}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[Trial 8] val_loss=0.0824 params={'hidden_size': 24, 'lstm_layers': 2, 'dropout': 0.15990213464750794, 'attention_head_size': 4, 'learning_rate': 0.002951369568839686}

=== Random Search Trial 9/10 ===
{'hidden_size': 8, 'lstm_layers': 1, 'dropout': 0.3579821220208962, 'attention_head_size': 4, 'learning_rate': 0.0005945199586931455}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 437    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 1.3 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[Trial 9] val_loss=0.1866 params={'hidden_size': 8, 'lstm_layers': 1, 'dropout': 0.3579821220208962, 'attention_head_size': 4, 'learning_rate': 0.0005945199586931455}

=== Random Search Trial 10/10 ===
{'hidden_size': 24, 'lstm_layers': 2, 'dropout': 0.384665661176, 'attention_head_size': 2, 'learning_rate': 0.0024443523095377374}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[Trial 10] val_loss=0.0949 params={'hidden_size': 24, 'lstm_layers': 2, 'dropout': 0.384665661176, 'attention_head_size': 2, 'learning_rate': 0.0024443523095377374}

===== BEST RANDOM SEARCH PARAMS =====
val_loss: 0.08239301294088364
params  : {'hidden_size': 24, 'lstm_layers': 2, 'dropout': 0.15990213464750794, 'attention_head_size': 4, 'learning_rate': 0.002951369568839686}


GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 26.1 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 6.7 K  | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 2.4 K  | train
8  | sta

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores


Checkpoint terbaik: D:\Documents\Skripsi\demand-forecasting\tft_checkpoints_15sku_log\tft_15sku_log_final.ckpt


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores


Train pred shape: (2160, 8)
Test  pred shape: (45, 8)
Unique n_test per seri: [5]

Saved:
 - D:\Documents\Skripsi\demand-forecasting\outputs\tft_15sku_log_train_predictions.csv
 - D:\Documents\Skripsi\demand-forecasting\outputs\tft_15sku_log_test_predictions.csv
 - D:\Documents\Skripsi\demand-forecasting\outputs\tft_15sku_log_metrics_summary.csv
 - D:\Documents\Skripsi\demand-forecasting\outputs\tft_15sku_log_metrics_by_series.csv


In [1]:
import warnings
warnings.filterwarnings("ignore")

from pathlib import Path
import numpy as np
import pandas as pd

import torch
from lightning.pytorch import Trainer, seed_everything
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint

from pytorch_forecasting.data import TimeSeriesDataSet
from pytorch_forecasting.data.encoders import GroupNormalizer
from pytorch_forecasting.models import TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss

import optuna
from optuna.pruners import MedianPruner

# =====================================================
# PATH & CONFIG
# =====================================================
PROJECT_ROOT  = Path(r"D:\Documents\Skripsi\demand-forecasting")
DATASET15_DIR = PROJECT_ROOT / "data" / "dataset_15"
OUT_DIR       = PROJECT_ROOT / "outputs"
OUT_DIR.mkdir(parents=True, exist_ok=True)

TFT_CHECKPOINT_DIR = PROJECT_ROOT / "tft_checkpoints_15sku_log"
TFT_CHECKPOINT_DIR.mkdir(parents=True, exist_ok=True)

EVAL_START = pd.Timestamp("2024-06-01")
EVAL_END   = pd.Timestamp("2024-10-01")

seed_everything(42)

# =====================================================
# 1. LOAD DATASET TFT & BANGUN SERIES_INFO
# =====================================================
df = pd.read_csv(
    DATASET15_DIR / "tft_dataset_15_fullfeat.csv",
    parse_dates=["periode"],
)

df = df.sort_values(["cabang", "sku", "periode"]).reset_index(drop=True)

# info seri dari train
train_mask = df["is_train"] == 1
info = (
    df.loc[train_mask]
      .groupby(["cabang", "sku"], as_index=False)
      .agg(
          n_train=("qty", "size"),
          zero_ratio_train=("qty", lambda s: (s == 0).mean()),
          total_qty=("qty", "sum"),
          first_train_periode=("periode", "min"),
          last_train_periode=("periode", "max"),
      )
)

info["eligible_model"] = (
    (info["n_train"] >= 30) &
    (info["zero_ratio_train"] <= 0.5) &
    (info["total_qty"] > 0)
).astype(int)

info_path = DATASET15_DIR / "series_info_full.csv"
info.to_csv(info_path, index=False)
print("Saved series_info_full:", info_path)
print("Total seri:", len(info), "| Eligible:", info["eligible_model"].sum())

df = df.merge(
    info[["cabang", "sku", "n_train", "zero_ratio_train", "eligible_model"]],
    on=["cabang", "sku"],
    how="left"
)

df["eligible_model"] = df["eligible_model"].fillna(0).astype(int)

# keep seri eligible
df = df[df["eligible_model"] == 1].copy()
df = df.sort_values(["cabang", "sku", "periode"]).reset_index(drop=True)

# time_idx per seri
df["time_idx"] = df.groupby(["cabang", "sku"]).cumcount()

# =====================================================
# 2. BERSIHKAN NA (LAG & ROLLING), SET SAMPLE_WEIGHT
# =====================================================
rolling_cols = [
    "qty_rollmean_3", "qty_rollstd_3",
    "qty_rollmean_6", "qty_rollstd_6",
    "qty_rollmean_12", "qty_rollstd_12",
]
rolling_cols = [c for c in rolling_cols if c in df.columns]

lag_cols = [c for c in df.columns if c.startswith("qty_lag")]

# rolling: per seri, bfill lalu ffill
if rolling_cols:
    df[rolling_cols] = (
        df.groupby(["cabang", "sku"])[rolling_cols]
          .transform(lambda g: g.bfill().ffill())
    )

# lag qty: NaN -> 0
if lag_cols:
    df[lag_cols] = df[lag_cols].fillna(0)

# lag exog: NaN -> 0
for col in ["event_flag_lag1", "holiday_count_lag1", "rainfall_lag1"]:
    if col in df.columns:
        df[col] = df[col].fillna(0)

# spike_flag + weight moderat
if "spike_flag" not in df.columns:
    df["spike_flag"] = 0

df["sample_weight"] = np.where(df["spike_flag"] == 1, 2.0, 1.0)

# target log1p
df["qty_log"] = np.log1p(df["qty"].clip(lower=0))

na_left = df.isna().sum()
na_left = na_left[na_left > 0]
print("Sisa NA setelah cleaning:")
print(na_left)

# =====================================================
# 3. BANGUN TimeSeriesDataSet (TARGET = qty_log)
# =====================================================
train_df = df[df["is_train"] == 1].copy()

print("Baris train:", len(train_df))
print("Range time_idx train:", train_df["time_idx"].min(), "->", train_df["time_idx"].max())

# 6 bulan terakhir sebagai internal validasi
training_cutoff = train_df["time_idx"].max() - 6
print("training_cutoff:", training_cutoff)

static_cat = ["cabang", "sku"]

known_reals = [
    "time_idx",
    "event_flag", "event_flag_lag1",
    "holiday_count", "holiday_count_lag1",
    "rainfall_lag1",
    "spike_flag",
]
known_reals = [c for c in known_reals if c in df.columns]

unknown_reals = rolling_cols + lag_cols   # qty_log TIDAK dimasukkan sebagai covariate

max_prediction_length = 1
min_encoder_length = 12
max_encoder_length = 24

training_ds = TimeSeriesDataSet(
    train_df,
    time_idx="time_idx",
    target="qty_log",
    group_ids=["cabang", "sku"],
    weight="sample_weight",

    min_encoder_length=min_encoder_length,
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,

    static_categoricals=static_cat,
    static_reals=[],
    time_varying_known_categoricals=[],
    time_varying_known_reals=known_reals,
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=unknown_reals,

    target_normalizer=GroupNormalizer(groups=["cabang", "sku"]),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,

    # pisah train/val internal
    min_prediction_idx=training_cutoff + 1,
)

batch_size = 64

train_loader = training_ds.to_dataloader(
    train=True,
    batch_size=batch_size,
    num_workers=0,
)
val_loader = training_ds.to_dataloader(
    train=False,
    batch_size=batch_size,
    num_workers=0,
)

print("Jumlah sample train:", len(train_loader.dataset))
print("Jumlah sample val  :", len(val_loader.dataset))

# =====================================================
# 4. RANDOM SEARCH PARAMETER (QuantileLoss q=0.5)
# =====================================================
def sample_params():
    return {
        "hidden_size": int(np.random.choice([8, 16, 24, 32])),
        "lstm_layers": int(np.random.choice([1, 2])),
        "dropout": float(np.random.uniform(0.1, 0.4)),
        "attention_head_size": int(np.random.choice([1, 2, 4])),
        "learning_rate": float(np.random.uniform(1e-4, 3e-3)),
    }

N_RS = 10
results = []
loss_q = QuantileLoss(quantiles=[0.5])

for i in range(N_RS):
    params = sample_params()
    print(f"\n=== Random Search Trial {i+1}/{N_RS} ===")
    print(params)

    model = TemporalFusionTransformer.from_dataset(
        training_ds,
        hidden_size=int(params["hidden_size"]),
        lstm_layers=int(params["lstm_layers"]),
        dropout=float(params["dropout"]),
        attention_head_size=int(params["attention_head_size"]),
        learning_rate=float(params["learning_rate"]),
        loss=loss_q,
        output_size=1,
        log_interval=10,
        log_val_interval=1,
    )

    trainer = Trainer(
        max_epochs=20,
        accelerator="cpu",
        enable_progress_bar=True,
        callbacks=[
            EarlyStopping(monitor="val_loss", patience=3, mode="min"),
        ],
    )

    trainer.fit(model, train_loader, val_loader)

    val_loss = float(trainer.callback_metrics["val_loss"].item())
    results.append((val_loss, params))
    print(f"[Trial {i+1}] val_loss={val_loss:.4f} params={params}")

best_rs_loss, best_rs_params = sorted(results, key=lambda x: x[0])[0]
print("\n===== BEST RANDOM SEARCH PARAMS =====")
print("RS val_loss:", best_rs_loss)
print("RS params  :", best_rs_params)

# =====================================================
# 5. BAYESIAN OPTIMIZATION (OPTUNA)
# =====================================================
def objective(trial: optuna.Trial) -> float:
    # range agak luas tapi masuk akal
    hidden_size = trial.suggest_int("hidden_size", 8, 64, step=8)
    lstm_layers = trial.suggest_int("lstm_layers", 1, 3)
    dropout = trial.suggest_float("dropout", 0.1, 0.5)
    attention_head_size = trial.suggest_categorical("attention_head_size", [1, 2, 4])
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 3e-3, log=True)

    params = {
        "hidden_size": int(hidden_size),
        "lstm_layers": int(lstm_layers),
        "dropout": float(dropout),
        "attention_head_size": int(attention_head_size),
        "learning_rate": float(learning_rate),
    }

    print(f"\n[Optuna] Trial {trial.number} params={params}")

    model = TemporalFusionTransformer.from_dataset(
        training_ds,
        hidden_size=params["hidden_size"],
        lstm_layers=params["lstm_layers"],
        dropout=params["dropout"],
        attention_head_size=params["attention_head_size"],
        learning_rate=params["learning_rate"],
        loss=loss_q,
        output_size=1,
        log_interval=10,
        log_val_interval=1,
    )

    trainer = Trainer(
        max_epochs=25,
        accelerator="cpu",
        enable_progress_bar=False,
        callbacks=[
            EarlyStopping(monitor="val_loss", patience=4, mode="min"),
        ],
    )

    trainer.fit(model, train_loader, val_loader)

    val_loss = float(trainer.callback_metrics["val_loss"].item())
    print(f"[Optuna] Trial {trial.number} val_loss={val_loss:.4f}")
    return val_loss

study = optuna.create_study(
    direction="minimize",
    pruner=MedianPruner(n_warmup_steps=3),
)

# optional: info RS sebagai baseline di log, tapi Optuna tetap running sendiri
study.optimize(objective, n_trials=20, show_progress_bar=False)

best_bo_params = study.best_trial.params
best_bo_loss = study.best_value

print("\n===== BEST OPTUNA PARAMS =====")
print("BO val_loss:", best_bo_loss)
print("BO params  :", best_bo_params)

# =====================================================
# 6. TRAIN FINAL MODEL DENGAN PARAMETER OPTUNA
# =====================================================
final_params = {
    "hidden_size": int(best_bo_params["hidden_size"]),
    "lstm_layers": int(best_bo_params["lstm_layers"]),
    "dropout": float(best_bo_params["dropout"]),
    "attention_head_size": int(best_bo_params["attention_head_size"]),
    "learning_rate": float(best_bo_params["learning_rate"]),
}

print("\nFINAL PARAMS (Optuna):", final_params)

final_model = TemporalFusionTransformer.from_dataset(
    training_ds,
    hidden_size=final_params["hidden_size"],
    lstm_layers=final_params["lstm_layers"],
    dropout=final_params["dropout"],
    attention_head_size=final_params["attention_head_size"],
    learning_rate=final_params["learning_rate"],
    loss=loss_q,
    output_size=1,
)

checkpoint_cb = ModelCheckpoint(
    dirpath=TFT_CHECKPOINT_DIR,
    filename="tft_15sku_log_final",
    monitor="val_loss",
    mode="min",
    save_top_k=1,
)

early_cb = EarlyStopping(
    monitor="val_loss",
    patience=8,
    mode="min",
)

final_trainer = Trainer(
    max_epochs=50,
    accelerator="cpu",
    enable_progress_bar=True,
    callbacks=[early_cb, checkpoint_cb],
)

final_trainer.fit(final_model, train_loader, val_loader)

print("Checkpoint terbaik:", checkpoint_cb.best_model_path)

# =====================================================
# 7. PREDIKSI TRAIN & FULL (BALIKKAN LOG -> LEVEL)
# =====================================================
def inv_log(x):
    return np.expm1(x)

# TRAIN
train_raw = final_model.predict(training_ds, return_index=True)
train_pred_log = train_raw.output.squeeze(-1)
train_idx = train_raw.index

df_train_pred = pd.DataFrame({
    "cabang":   train_idx["cabang"],
    "sku":      train_idx["sku"],
    "time_idx": train_idx["time_idx"],
    "qty_pred_log": train_pred_log,
})
df_train_pred["qty_pred"] = inv_log(df_train_pred["qty_pred_log"])

df_train_pred = df_train_pred.merge(
    df[["cabang", "sku", "time_idx", "qty", "periode", "is_train"]],
    on=["cabang", "sku", "time_idx"],
    how="left",
)

df_train_pred = df_train_pred[df_train_pred["is_train"] == 1].copy()

# FULL (train + val + test)
full_ds = TimeSeriesDataSet.from_dataset(
    training_ds,
    df,
    stop_randomization=True,
)

full_raw = final_model.predict(full_ds, return_index=True)
full_pred_log = full_raw.output.squeeze(-1)
full_idx = full_raw.index

df_pred_all = pd.DataFrame({
    "cabang":   full_idx["cabang"],
    "sku":      full_idx["sku"],
    "time_idx": full_idx["time_idx"],
    "qty_pred_log": full_pred_log,
})
df_pred_all["qty_pred"] = inv_log(df_pred_all["qty_pred_log"])

df_pred_all = df_pred_all.merge(
    df[["cabang", "sku", "time_idx", "qty", "periode", "is_test"]],
    on=["cabang", "sku", "time_idx"],
    how="left",
)

# collapse duplikat window
df_pred_all = (
    df_pred_all
    .groupby(["cabang", "sku", "time_idx", "periode"], as_index=False)
    .agg(
        qty_pred_log=("qty_pred_log", "mean"),
        qty_pred=("qty_pred", "mean"),
        qty=("qty", "first"),
        is_test=("is_test", "max"),
    )
)

# FILTER 5 BULAN TEST
df_test_pred = df_pred_all[
    (df_pred_all["is_test"] == 1) &
    (df_pred_all["periode"] >= EVAL_START) &
    (df_pred_all["periode"] <= EVAL_END)
].copy()

print("Train pred shape:", df_train_pred.shape)
print("Test  pred shape:", df_test_pred.shape)
print("Unique n_test per seri:",
      df_test_pred.groupby(["cabang", "sku"])["periode"].nunique().unique())

# =====================================================
# 8. HITUNG METRIK
# =====================================================
def calc_metrics(dfm):
    dfm = dfm.dropna(subset=["qty", "qty_pred"]).copy()
    err = dfm["qty_pred"] - dfm["qty"]

    mse  = float(np.mean(err**2))
    rmse = float(np.sqrt(mse))
    mae  = float(np.mean(np.abs(err)))

    nonzero = dfm["qty"] != 0
    if nonzero.sum() > 0:
        mape = float(
            np.mean(
                np.abs(dfm.loc[nonzero, "qty_pred"] - dfm.loc[nonzero, "qty"])
                / dfm.loc[nonzero, "qty"]
            ) * 100
        )
    else:
        mape = np.nan

    smape = float(
        np.mean(
            2 * np.abs(dfm["qty_pred"] - dfm["qty"])
            / (np.abs(dfm["qty_pred"]) + np.abs(dfm["qty"]) + 1e-9)
        ) * 100
    )

    return rmse, mae, mape, smape, mse

rmse_tr, mae_tr, mape_tr, smape_tr, mse_tr = calc_metrics(df_train_pred)
rmse_te, mae_te, mape_te, smape_te, mse_te = calc_metrics(df_test_pred)

summary = pd.DataFrame([{
    "RMSE_train": rmse_tr,
    "MAE_train": mae_tr,
    "MAPE%_train": mape_tr,
    "sMAPE%_train": smape_tr,
    "MSE_train": mse_tr,
    "RMSE_test": rmse_te,
    "MAE_test": mae_te,
    "MAPE%_test": mape_te,
    "sMAPE%_test": smape_te,
    "MSE_test": mse_te,
}])

rows = []
for (cab, sku), g in df_test_pred.groupby(["cabang", "sku"], sort=False):
    rmse, mae, mape, smape, mse = calc_metrics(g)
    rows.append({
        "cabang": cab,
        "sku": sku,
        "n_test": len(g),
        "MSE_test": mse,
        "RMSE_test": rmse,
        "MAE_test": mae,
        "MAPE%_test": mape,
        "sMAPE%_test": smape,
    })

metrics_by_series = pd.DataFrame(rows)

# =====================================================
# 9. SAVE OUTPUT
# =====================================================
train_path   = OUT_DIR / "tft_15sku_log_train_predictions_optuna.csv"
test_path    = OUT_DIR / "tft_15sku_log_test_predictions_optuna.csv"
summary_path = OUT_DIR / "tft_15sku_log_metrics_summary_optuna.csv"
series_path  = OUT_DIR / "tft_15sku_log_metrics_by_series_optuna.csv"

df_train_pred.to_csv(train_path, index=False)
df_test_pred.to_csv(test_path, index=False)
summary.to_csv(summary_path, index=False)
metrics_by_series.to_csv(series_path, index=False)

print("\nSaved:")
print(" -", train_path)
print(" -", test_path)
print(" -", summary_path)
print(" -", series_path)


Seed set to 42


Saved series_info_full: D:\Documents\Skripsi\demand-forecasting\data\dataset_15\series_info_full.csv
Total seri: 120 | Eligible: 120
Sisa NA setelah cleaning:
Series([], dtype: int64)
Baris train: 4920
Range time_idx train: 0 -> 40
training_cutoff: 34
Jumlah sample train: 2160
Jumlah sample val  : 2160

=== Random Search Trial 1/10 ===
{'hidden_size': 24, 'lstm_layers': 2, 'dropout': 0.38521429192297485, 'attention_head_size': 4, 'learning_rate': 0.002361103900791031}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[Trial 1] val_loss=0.1171 params={'hidden_size': 24, 'lstm_layers': 2, 'dropout': 0.38521429192297485, 'attention_head_size': 4, 'learning_rate': 0.002361103900791031}

=== Random Search Trial 2/10 ===
{'hidden_size': 8, 'lstm_layers': 1, 'dropout': 0.23374982585607737, 'attention_head_size': 4, 'learning_rate': 0.00026844247528777846}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 437    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 1.3 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[Trial 2] val_loss=0.1945 params={'hidden_size': 8, 'lstm_layers': 1, 'dropout': 0.23374982585607737, 'attention_head_size': 4, 'learning_rate': 0.00026844247528777846}

=== Random Search Trial 3/10 ===
{'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.2803345035229627, 'attention_head_size': 4, 'learning_rate': 0.00015969503345782712}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.2 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[Trial 3] val_loss=0.1910 params={'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.2803345035229627, 'attention_head_size': 4, 'learning_rate': 0.00015969503345782712}

=== Random Search Trial 4/10 ===
{'hidden_size': 16, 'lstm_layers': 2, 'dropout': 0.34973279224012654, 'attention_head_size': 2, 'learning_rate': 0.00010225842093894156}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.0 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[Trial 4] val_loss=0.2482 params={'hidden_size': 16, 'lstm_layers': 2, 'dropout': 0.34973279224012654, 'attention_head_size': 2, 'learning_rate': 0.00010225842093894156}

=== Random Search Trial 5/10 ===
{'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.285244452888315, 'attention_head_size': 2, 'learning_rate': 0.00162179365173349}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.2 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[Trial 5] val_loss=0.0928 params={'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.285244452888315, 'attention_head_size': 2, 'learning_rate': 0.00162179365173349}

=== Random Search Trial 6/10 ===
{'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.1873687420594126, 'attention_head_size': 4, 'learning_rate': 0.001259596817974241}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.2 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[Trial 6] val_loss=0.0956 params={'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.1873687420594126, 'attention_head_size': 4, 'learning_rate': 0.001259596817974241}

=== Random Search Trial 7/10 ===
{'hidden_size': 32, 'lstm_layers': 2, 'dropout': 0.39212665565243776, 'attention_head_size': 4, 'learning_rate': 0.0014226029542294043}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.2 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[Trial 7] val_loss=0.1108 params={'hidden_size': 32, 'lstm_layers': 2, 'dropout': 0.39212665565243776, 'attention_head_size': 4, 'learning_rate': 0.0014226029542294043}

=== Random Search Trial 8/10 ===
{'hidden_size': 24, 'lstm_layers': 2, 'dropout': 0.15990213464750794, 'attention_head_size': 4, 'learning_rate': 0.002951369568839686}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[Trial 8] val_loss=0.0898 params={'hidden_size': 24, 'lstm_layers': 2, 'dropout': 0.15990213464750794, 'attention_head_size': 4, 'learning_rate': 0.002951369568839686}

=== Random Search Trial 9/10 ===
{'hidden_size': 8, 'lstm_layers': 1, 'dropout': 0.3579821220208962, 'attention_head_size': 4, 'learning_rate': 0.0005945199586931455}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 437    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 1.3 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


[Trial 9] val_loss=0.1910 params={'hidden_size': 8, 'lstm_layers': 1, 'dropout': 0.3579821220208962, 'attention_head_size': 4, 'learning_rate': 0.0005945199586931455}

=== Random Search Trial 10/10 ===
{'hidden_size': 24, 'lstm_layers': 2, 'dropout': 0.384665661176, 'attention_head_size': 2, 'learning_rate': 0.0024443523095377374}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.
[I 2025-11-18 16:38:16,038] A new study created in memory with name: no-name-86b65c6e-31c5-4188-bd49-25304c501a45


[Trial 10] val_loss=0.0966 params={'hidden_size': 24, 'lstm_layers': 2, 'dropout': 0.384665661176, 'attention_head_size': 2, 'learning_rate': 0.0024443523095377374}

===== BEST RANDOM SEARCH PARAMS =====
RS val_loss: 0.08978880196809769
RS params  : {'hidden_size': 24, 'lstm_layers': 2, 'dropout': 0.15990213464750794, 'attention_head_size': 4, 'learning_rate': 0.002951369568839686}

[Optuna] Trial 0 params={'hidden_size': 40, 'lstm_layers': 2, 'dropout': 0.12552996360962543, 'attention_head_size': 4, 'learning_rate': 0.00019900504119860785}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.8 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[Optuna] Trial 0 val_loss=0.1232

[Optuna] Trial 1 params={'hidden_size': 8, 'lstm_layers': 3, 'dropout': 0.1707196905356707, 'attention_head_size': 1, 'learning_rate': 0.0006631274303772053}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 437    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 1.3 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[Optuna] Trial 1 val_loss=0.1404

[Optuna] Trial 2 params={'hidden_size': 64, 'lstm_layers': 3, 'dropout': 0.4497027531881359, 'attention_head_size': 2, 'learning_rate': 0.00018225689921173437}



   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 5.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 51.8 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 14.3 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 16.8 K | train
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 16.8 K 

[Optuna] Trial 2 val_loss=0.1683

[Optuna] Trial 3 params={'hidden_size': 8, 'lstm_layers': 1, 'dropout': 0.49988549491764667, 'attention_head_size': 4, 'learning_rate': 0.00021204930801766749}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 437    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 1.3 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[Optuna] Trial 3 val_loss=0.2207

[Optuna] Trial 4 params={'hidden_size': 64, 'lstm_layers': 3, 'dropout': 0.194924034840796, 'attention_head_size': 1, 'learning_rate': 0.00277249491362397}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 5.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[Optuna] Trial 4 val_loss=0.1181

[Optuna] Trial 5 params={'hidden_size': 64, 'lstm_layers': 1, 'dropout': 0.3182222533363749, 'attention_head_size': 1, 'learning_rate': 0.0003085674365869846}


GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 5.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 51.8 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 14.3 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 16.8 K | train
8  | sta

[Optuna] Trial 5 val_loss=0.1317

[Optuna] Trial 6 params={'hidden_size': 8, 'lstm_layers': 1, 'dropout': 0.28447862956808145, 'attention_head_size': 1, 'learning_rate': 0.0021844208305161535}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 437    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 1.3 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[Optuna] Trial 6 val_loss=0.1100

[Optuna] Trial 7 params={'hidden_size': 24, 'lstm_layers': 3, 'dropout': 0.3754112555901278, 'attention_head_size': 1, 'learning_rate': 0.00032035543135959743}


GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 26.1 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 6.7 K  | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 2.4 K  | train
8  | sta

[Optuna] Trial 7 val_loss=0.1685

[Optuna] Trial 8 params={'hidden_size': 24, 'lstm_layers': 3, 'dropout': 0.35121834286005826, 'attention_head_size': 2, 'learning_rate': 0.0014082381466887868}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[Optuna] Trial 8 val_loss=0.1118

[Optuna] Trial 9 params={'hidden_size': 40, 'lstm_layers': 2, 'dropout': 0.2672487679269835, 'attention_head_size': 2, 'learning_rate': 0.001130773772785465}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.8 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[Optuna] Trial 9 val_loss=0.0805

[Optuna] Trial 10 params={'hidden_size': 48, 'lstm_layers': 2, 'dropout': 0.2496340765485474, 'attention_head_size': 2, 'learning_rate': 0.0008487099576868733}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 4.4 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[Optuna] Trial 10 val_loss=0.0979

[Optuna] Trial 11 params={'hidden_size': 48, 'lstm_layers': 2, 'dropout': 0.2513171982761636, 'attention_head_size': 2, 'learning_rate': 0.0008280616136758653}


`Trainer.fit` stopped: `max_epochs=25` reached.
[I 2025-11-18 18:39:07,939] Trial 11 finished with value: 0.090152807533741 and parameters: {'hidden_size': 48, 'lstm_layers': 2, 'dropout': 0.2513171982761636, 'attention_head_size': 2, 'learning_rate': 0.0008280616136758653}. Best is trial 9 with value: 0.08047028630971909.


[Optuna] Trial 11 val_loss=0.0902

[Optuna] Trial 12 params={'hidden_size': 48, 'lstm_layers': 2, 'dropout': 0.2390691171029722, 'attention_head_size': 2, 'learning_rate': 0.0012064045771810854}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 4.4 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[Optuna] Trial 12 val_loss=0.0966

[Optuna] Trial 13 params={'hidden_size': 48, 'lstm_layers': 2, 'dropout': 0.21899215734958072, 'attention_head_size': 2, 'learning_rate': 0.0001016435219533597}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 4.4 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[Optuna] Trial 13 val_loss=0.1591

[Optuna] Trial 14 params={'hidden_size': 32, 'lstm_layers': 2, 'dropout': 0.1037786633575872, 'attention_head_size': 2, 'learning_rate': 0.0004739206010771206}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.2 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[Optuna] Trial 14 val_loss=0.1016

[Optuna] Trial 15 params={'hidden_size': 40, 'lstm_layers': 2, 'dropout': 0.304190201247747, 'attention_head_size': 2, 'learning_rate': 0.0014072751282735718}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.8 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[Optuna] Trial 15 val_loss=0.0920

[Optuna] Trial 16 params={'hidden_size': 56, 'lstm_layers': 1, 'dropout': 0.40487346791686546, 'attention_head_size': 2, 'learning_rate': 0.0009832875313949656}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 5.0 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[Optuna] Trial 16 val_loss=0.1184

[Optuna] Trial 17 params={'hidden_size': 32, 'lstm_layers': 2, 'dropout': 0.15603046937572784, 'attention_head_size': 4, 'learning_rate': 0.0005337428067181717}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.2 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[Optuna] Trial 17 val_loss=0.0952

[Optuna] Trial 18 params={'hidden_size': 56, 'lstm_layers': 2, 'dropout': 0.27579774775968025, 'attention_head_size': 2, 'learning_rate': 0.0016150395713785584}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 5.0 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[Optuna] Trial 18 val_loss=0.0967

[Optuna] Trial 19 params={'hidden_size': 24, 'lstm_layers': 1, 'dropout': 0.32472620722267587, 'attention_head_size': 2, 'learning_rate': 0.0009032528874852721}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[Optuna] Trial 19 val_loss=0.1156

===== BEST OPTUNA PARAMS =====
BO val_loss: 0.08047028630971909
BO params  : {'hidden_size': 40, 'lstm_layers': 2, 'dropout': 0.2672487679269835, 'attention_head_size': 2, 'learning_rate': 0.001130773772785465}

FINAL PARAMS (Optuna): {'hidden_size': 40, 'lstm_layers': 2, 'dropout': 0.2672487679269835, 'attention_head_size': 2, 'learning_rate': 0.001130773772785465}


GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 731    | train
3  | prescalers                         | ModuleDict                      | 464    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.8 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 36.8 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 9.7 K  | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 6.6 K  | train
8  | sta

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores


Checkpoint terbaik: D:\Documents\Skripsi\demand-forecasting\tft_checkpoints_15sku_log\tft_15sku_log_final-v2.ckpt


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores


Train pred shape: (2160, 8)
Test  pred shape: (45, 8)
Unique n_test per seri: [5]

Saved:
 - D:\Documents\Skripsi\demand-forecasting\outputs\tft_15sku_log_train_predictions_optuna.csv
 - D:\Documents\Skripsi\demand-forecasting\outputs\tft_15sku_log_test_predictions_optuna.csv
 - D:\Documents\Skripsi\demand-forecasting\outputs\tft_15sku_log_metrics_summary_optuna.csv
 - D:\Documents\Skripsi\demand-forecasting\outputs\tft_15sku_log_metrics_by_series_optuna.csv


Pipeline ini:

Cek dan profil dulu datanya

Cluster SKU berdasarkan karakter historis

Beri rekomendasi cluster (summary)

Latih TFT global per cluster, tetap pakai Random Search + Bayesian Optimization

In [None]:
#cluster

import warnings
warnings.filterwarnings("ignore")

from pathlib import Path
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score


# =====================================================
# PATH CONFIG
# =====================================================
PROJECT_ROOT = Path(r"D:\Documents\Skripsi\demand-forecasting")
DATA_PATH = PROJECT_ROOT / "data" / "dataset_15" / "tft_dataset_15_fullfeat.csv"

OUT_DIR = PROJECT_ROOT / "outputs" / "tft_clustered"
OUT_DIR.mkdir(parents=True, exist_ok=True)


# =====================================================
# LOAD DATA
# =====================================================
df = pd.read_csv(DATA_PATH, parse_dates=["periode"])
df = df.sort_values(["cabang", "sku", "periode"]).reset_index(drop=True)

required_cols = ["cabang", "sku", "periode", "qty", "is_train"]
for c in required_cols:
    if c not in df.columns:
        raise ValueError(f"Missing required column: {c}")

if "spike_flag" not in df.columns:
    df["spike_flag"] = 0


# =====================================================
# SKU PROFILING (TRAIN ONLY)
# =====================================================
train_mask = df["is_train"] == 1
df_train = df[train_mask].copy()

def zero_ratio(x):
    return (np.asarray(x, float) == 0).mean()

def safe_skew(x):
    x = np.asarray(x, float)
    if len(x) < 3:
        return 0.0
    return pd.Series(x).skew()

def spike_count(qty_series, spike_flag):
    if spike_flag is not None:
        return int(spike_flag.sum())
    q95 = np.quantile(qty_series, 0.95)
    return int((qty_series > q95).sum())

df_train["_qty"] = df_train["qty"]
df_train["_spike_flag"] = df_train["spike_flag"]

profiles = (
    df_train
    .groupby(["cabang", "sku"], as_index=False)
    .apply(
        lambda g: pd.Series({
            "n_train": g["_qty"].size,
            "zero_ratio": zero_ratio(g["_qty"]),
            "mean_qty": g["_qty"].mean(),
            "std_qty": g["_qty"].std(ddof=1) if g["_qty"].size > 1 else 0.0,
            "max_qty": g["_qty"].max(),
            "p95_qty": np.quantile(g["_qty"], 0.95),
            "skew_qty": safe_skew(g["_qty"]),
            "spike_count": spike_count(g["_qty"], g["_spike_flag"]),
        })
    )
    .reset_index(drop=True)
)

profiles["volatility"] = profiles["std_qty"] / (profiles["mean_qty"] + 1e-9)
profiles["eligible_base"] = (
    (profiles["n_train"] >= 30) &
    (profiles["mean_qty"] > 0)
).astype(int)

profiles_raw_path = OUT_DIR / "sku_profiles_raw.csv"
profiles.to_csv(profiles_raw_path, index=False)


# =====================================================
# FILTER ELIGIBLE SKU
# =====================================================
profiles_elig = profiles[profiles["eligible_base"] == 1].copy()
if len(profiles_elig) < 2:
    raise ValueError("Not enough eligible series for clustering.")


# =====================================================
# CLUSTERING (AUTO K = 2-5)
# =====================================================
feature_cols = [
    "mean_qty",
    "std_qty",
    "zero_ratio",
    "volatility",
    "max_qty",
    "p95_qty",
    "skew_qty",
    "spike_count",
]

X = profiles_elig[feature_cols].fillna(0).values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

possible_k = [2, 3, 4, 5]
best_k = None
best_score = -1
best_kmeans = None

for k in possible_k:
    if len(profiles_elig) <= k:
        continue

    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
    labels = kmeans.fit_predict(X_scaled)

    if len(np.unique(labels)) < 2:
        continue

    score = silhouette_score(X_scaled, labels)
    print(f"K={k}, silhouette={score:.4f}")

    if score > best_score:
        best_score = score
        best_k = k
        best_kmeans = kmeans

if best_k is None:
    raise ValueError("Failed to find a valid K for clustering.")

profiles_elig["cluster_id"] = best_kmeans.predict(X_scaled)


# =====================================================
# MERGE CLUSTER_ID KE PROFILES
# =====================================================
profiles = profiles.merge(
    profiles_elig[["cabang", "sku", "cluster_id"]],
    on=["cabang", "sku"],
    how="left"
)

profiles_clustered_path = OUT_DIR / "sku_profiles_clustered.csv"
profiles.to_csv(profiles_clustered_path, index=False)


# =====================================================
# CLUSTER SUMMARY
# =====================================================
summary_rows = []
for cid, g in profiles_elig.groupby("cluster_id", sort=True):
    summary_rows.append({
        "cluster_id": cid,
        "n_sku": len(g),
        "mean_mean_qty": g["mean_qty"].mean(),
        "mean_zero_ratio": g["zero_ratio"].mean(),
        "mean_volatility": g["volatility"].mean(),
        "mean_spike_count": g["spike_count"].mean(),
    })

cluster_summary = pd.DataFrame(summary_rows).sort_values("cluster_id")

cluster_summary_path = OUT_DIR / "cluster_summary.csv"
cluster_summary.to_csv(cluster_summary_path, index=False)

print("\nCluster Summary:")
print(cluster_summary)


# =====================================================
# MERGE CLUSTER_ID KE PANEL & PREPROCESS
# =====================================================
df = df.merge(
    profiles[["cabang", "sku", "cluster_id"]],
    on=["cabang", "sku"],
    how="left"
)

df = df[df["cluster_id"].notna()].copy()

df["cabang"] = df["cabang"].astype(str)
df["sku"] = df["sku"].astype(str)
df["cluster_id"] = df["cluster_id"].astype(int).astype(str)


# Rolling & lag
rolling_cols = [c for c in df.columns if "roll" in c]
lag_cols = [c for c in df.columns if c.startswith("qty_lag")]

if rolling_cols:
    df[rolling_cols] = df.groupby(["cabang", "sku"])[rolling_cols].transform(
        lambda g: g.bfill().ffill()
    )

if lag_cols:
    df[lag_cols] = df[lag_cols].fillna(0)

for col in ["event_flag_lag1", "holiday_count_lag1", "rainfall_lag1"]:
    if col in df.columns:
        df[col] = df[col].fillna(0)

df["sample_weight"] = np.where(df["spike_flag"] == 1, 2.0, 1.0)
df["qty_log"] = np.log1p(df["qty"].clip(lower=0))

df["month"] = df["periode"].dt.month
df["year"] = df["periode"].dt.year
df["time_idx"] = df.groupby(["cabang", "sku"]).cumcount()


# =====================================================
# SAVE PANEL READY FOR TFT
# =====================================================
panel_ready_path = OUT_DIR / "panel_ready_tft.csv"
df.to_csv(panel_ready_path, index=False)

print("\nSaved panel_ready_tft.csv:", panel_ready_path)


K=2, silhouette=0.5172
K=3, silhouette=0.4141
K=4, silhouette=0.4302
K=5, silhouette=0.4404

Cluster Summary:
   cluster_id  n_sku  mean_mean_qty  mean_zero_ratio  mean_volatility  \
0           0     24    3713.390244              0.0         0.839328   
1           1     96    1460.329522              0.0         0.448534   

   mean_spike_count  
0          3.833333  
1          1.447917  

Saved panel_ready_tft.csv: D:\Documents\Skripsi\demand-forecasting\outputs\tft_clustered\panel_ready_tft.csv


In [4]:
import warnings
warnings.filterwarnings("ignore")

from pathlib import Path
import json

import numpy as np
import pandas as pd

import torch
from lightning.pytorch import Trainer, seed_everything
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint
from lightning.pytorch.loggers import CSVLogger

from pytorch_forecasting.data import TimeSeriesDataSet
from pytorch_forecasting.data.encoders import GroupNormalizer
from pytorch_forecasting.models import TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss

import optuna
from optuna.pruners import MedianPruner


# =====================================================
# PATH & CONFIG
# =====================================================
PROJECT_ROOT = Path(r"D:\Documents\Skripsi\demand-forecasting")

CLUSTER_OUT_DIR = PROJECT_ROOT / "outputs" / "tft_clustered"
PANEL_PATH = CLUSTER_OUT_DIR / "panel_ready_tft.csv"

GLOBAL_OUT_DIR = PROJECT_ROOT / "outputs" / "tft_global"
GLOBAL_OUT_DIR.mkdir(parents=True, exist_ok=True)

CHECKPOINT_DIR = PROJECT_ROOT / "tft_checkpoints_global"
CHECKPOINT_DIR.mkdir(parents=True, exist_ok=True)

seed_everything(42)

# budget tuning global
N_RS = 10          # random search trials
N_TRIALS_BO = 20   # optuna trials


# =====================================================
# LOAD PANEL READY
# =====================================================
df = pd.read_csv(PANEL_PATH, parse_dates=["periode"])
df = df.sort_values(["cabang", "sku", "periode"]).reset_index(drop=True)

required_cols = [
    "area", "cabang", "sku", "periode",
    "qty", "qty_log", "is_train",
    "time_idx", "month", "year",
    "cluster_id",
]
for c in required_cols:
    if c not in df.columns:
        raise ValueError(f"Missing required column: {c}")

# pastikan tipe kategori string
df["area"] = df["area"].astype(str)
df["cabang"] = df["cabang"].astype(str)
df["sku"] = df["sku"].astype(str)
df["cluster_id"] = df["cluster_id"].astype(str)

if "spike_flag" not in df.columns:
    df["spike_flag"] = 0
if "sample_weight" not in df.columns:
    df["sample_weight"] = 1.0

# subset train saja untuk building TimeSeriesDataSet
df_train = df[df["is_train"] == 1].copy()
if df_train.empty:
    raise ValueError("No train data (is_train == 1) found.")

print("Rows train:", len(df_train))
print("Time_idx range train:", df_train["time_idx"].min(), "->", df_train["time_idx"].max())


# =====================================================
# CONFIG FEATURE LISTS
# =====================================================
rolling_cols = [c for c in df.columns if "roll" in c]
lag_cols = [c for c in df.columns if c.startswith("qty_lag")]

static_categoricals = ["area", "cabang", "sku", "cluster_id"]

known_reals = [
    "time_idx",
    "month",
    "year",
    "event_flag",
    "event_flag_lag1",
    "holiday_count",
    "holiday_count_lag1",
    "rainfall_lag1",
    "spike_flag",
]
known_reals = [c for c in known_reals if c in df.columns]

unknown_reals = rolling_cols + lag_cols

# cutoff validasi internal (6 bulan terakhir di train)
training_cutoff = df_train["time_idx"].max() - 6
print("training_cutoff:", training_cutoff)


# =====================================================
# BUILD TimeSeriesDataSet GLOBAL
# =====================================================
training_ds = TimeSeriesDataSet(
    df_train,
    time_idx="time_idx",
    target="qty_log",
    group_ids=["cabang", "sku"],
    weight="sample_weight",

    min_encoder_length=12,
    max_encoder_length=24,
    min_prediction_length=1,
    max_prediction_length=1,

    static_categoricals=static_categoricals,
    time_varying_known_reals=known_reals,
    time_varying_unknown_reals=unknown_reals,

    target_normalizer=GroupNormalizer(groups=["cabang", "sku"]),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,

    min_prediction_idx=training_cutoff + 1,
)

train_loader = training_ds.to_dataloader(
    train=True,
    batch_size=64,
    num_workers=0,
)
val_loader = training_ds.to_dataloader(
    train=False,
    batch_size=64,
    num_workers=0,
)

print("Num samples (train ds):", len(train_loader.dataset))
print("Num samples (val ds)  :", len(val_loader.dataset))


# =====================================================
# RANDOM SEARCH + BAYESIAN OPTIMIZATION
# =====================================================
loss_q = QuantileLoss(quantiles=[0.5])


def sample_params():
    return {
        "hidden_size": int(np.random.choice([8, 16, 24, 32, 48])),
        "lstm_layers": int(np.random.choice([1, 2, 3])),
        "dropout": float(np.random.uniform(0.1, 0.4)),
        "attention_head_size": int(np.random.choice([1, 2, 4])),
        "learning_rate": float(np.random.uniform(1e-4, 3e-3)),
    }


# ---------------- RANDOM SEARCH ----------------
rs_results = []
print(f"\n=== GLOBAL TFT: Random Search ({N_RS} trials) ===")
for i in range(N_RS):
    params = sample_params()
    print(f"[RS {i+1}/{N_RS}] params={params}")

    model = TemporalFusionTransformer.from_dataset(
        training_ds,
        hidden_size=params["hidden_size"],
        lstm_layers=params["lstm_layers"],
        dropout=params["dropout"],
        attention_head_size=params["attention_head_size"],
        learning_rate=params["learning_rate"],
        loss=loss_q,
        output_size=1,
        log_interval=10,
        log_val_interval=1,
    )

    trainer = Trainer(
        max_epochs=15,
        accelerator="cpu",
        enable_progress_bar=False,
        callbacks=[
            EarlyStopping(monitor="val_loss", patience=3, mode="min"),
        ],
    )
    trainer.fit(model, train_loader, val_loader)

    val_loss = float(trainer.callback_metrics["val_loss"].item())
    rs_results.append((val_loss, params))
    print(f"[RS {i+1}] val_loss={val_loss:.4f}")

best_rs_loss, best_rs_params = sorted(rs_results, key=lambda x: x[0])[0]
print("\nBest RS loss:", best_rs_loss)
print("Best RS params:", best_rs_params)


# ---------------- OPTUNA (BO) ----------------
def objective(trial: optuna.Trial) -> float:
    hidden_size = trial.suggest_int("hidden_size", 8, 64, step=8)
    lstm_layers = trial.suggest_int("lstm_layers", 1, 3)
    dropout = trial.suggest_float("dropout", 0.1, 0.5)
    attention_head_size = trial.suggest_categorical("attention_head_size", [1, 2, 4])
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 3e-3, log=True)

    params = {
        "hidden_size": int(hidden_size),
        "lstm_layers": int(lstm_layers),
        "dropout": float(dropout),
        "attention_head_size": int(attention_head_size),
        "learning_rate": float(learning_rate),
    }

    model = TemporalFusionTransformer.from_dataset(
        training_ds,
        hidden_size=params["hidden_size"],
        lstm_layers=params["lstm_layers"],
        dropout=params["dropout"],
        attention_head_size=params["attention_head_size"],
        learning_rate=params["learning_rate"],
        loss=loss_q,
        output_size=1,
        log_interval=10,
        log_val_interval=1,
    )

    trainer = Trainer(
        max_epochs=20,
        accelerator="cpu",
        enable_progress_bar=False,
        callbacks=[
            EarlyStopping(monitor="val_loss", patience=4, mode="min"),
        ],
    )
    trainer.fit(model, train_loader, val_loader)
    val_loss = float(trainer.callback_metrics["val_loss"].item())
    return val_loss


print(f"\n=== GLOBAL TFT: Bayesian Optimization (Optuna, {N_TRIALS_BO} trials) ===")
study = optuna.create_study(
    direction="minimize",
    pruner=MedianPruner(n_warmup_steps=3),
)
study.optimize(objective, n_trials=N_TRIALS_BO, show_progress_bar=False)

best_bo_params = study.best_trial.params
best_bo_loss = study.best_value

print("\nBest BO loss:", best_bo_loss)
print("Best BO params:", best_bo_params)


# =====================================================
# TRAIN FINAL GLOBAL MODEL
# =====================================================
final_params = {
    "hidden_size": int(best_bo_params["hidden_size"]),
    "lstm_layers": int(best_bo_params["lstm_layers"]),
    "dropout": float(best_bo_params["dropout"]),
    "attention_head_size": int(best_bo_params["attention_head_size"]),
    "learning_rate": float(best_bo_params["learning_rate"]),
}

print("\nFINAL GLOBAL PARAMS:", final_params)

logger = CSVLogger(save_dir=str(GLOBAL_OUT_DIR), name="tft_global")

checkpoint_cb = ModelCheckpoint(
    dirpath=CHECKPOINT_DIR,
    filename="tft_global_best",
    monitor="val_loss",
    mode="min",
    save_top_k=1,
)

early_cb = EarlyStopping(
    monitor="val_loss",
    patience=8,
    mode="min",
)

final_model = TemporalFusionTransformer.from_dataset(
    training_ds,
    hidden_size=final_params["hidden_size"],
    lstm_layers=final_params["lstm_layers"],
    dropout=final_params["dropout"],
    attention_head_size=final_params["attention_head_size"],
    learning_rate=final_params["learning_rate"],
    loss=loss_q,
    output_size=1,
    log_interval=10,
    log_val_interval=1,
)

trainer = Trainer(
    max_epochs=40,
    accelerator="cpu",
    logger=logger,
    callbacks=[checkpoint_cb, early_cb],
    enable_progress_bar=True,
)

trainer.fit(final_model, train_loader, val_loader)

print("\nBest checkpoint path:", checkpoint_cb.best_model_path)

# simpan info model global
info = {
    "best_rs_loss": best_rs_loss,
    "best_rs_params": best_rs_params,
    "best_bo_loss": best_bo_loss,
    "best_bo_params": best_bo_params,
    "final_params": final_params,
    "best_checkpoint": checkpoint_cb.best_model_path,
}

info_path = GLOBAL_OUT_DIR / "tft_global_info.json"
with open(info_path, "w") as f:
    json.dump(info, f, indent=2)

print("Saved global TFT info to:", info_path)


Seed set to 42


Rows train: 4920
Time_idx range train: 0 -> 40
training_cutoff: 34
Num samples (train ds): 2160
Num samples (val ds)  : 2160

=== GLOBAL TFT: Random Search (10 trials) ===
[RS 1/10] params={'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.15503043695984914, 'attention_head_size': 1, 'learning_rate': 0.0018308654580448125}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 778    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[RS 1] val_loss=0.0891
[RS 2/10] params={'hidden_size': 16, 'lstm_layers': 3, 'dropout': 0.12999247474540088, 'attention_head_size': 4, 'learning_rate': 0.002611910822747312}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 778    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.3 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[RS 2] val_loss=0.1137
[RS 3/10] params={'hidden_size': 32, 'lstm_layers': 3, 'dropout': 0.10617534828874074, 'attention_head_size': 2, 'learning_rate': 0.002193796439573792}



   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 778    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 34.9 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 10.5 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 4.3 K  | train
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 4.3 K  

[RS 3] val_loss=0.0959
[RS 4/10] params={'hidden_size': 16, 'lstm_layers': 1, 'dropout': 0.285244452888315, 'attention_head_size': 2, 'learning_rate': 0.00162179365173349}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 778    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.3 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[RS 4] val_loss=0.1299
[RS 5/10] params={'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.1873687420594126, 'attention_head_size': 4, 'learning_rate': 0.001259596817974241}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 778    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[RS 5] val_loss=0.1028
[RS 6/10] params={'hidden_size': 32, 'lstm_layers': 3, 'dropout': 0.2368209952651108, 'attention_head_size': 4, 'learning_rate': 0.0018933194270659534}


`Trainer.fit` stopped: `max_epochs=15` reached.


[RS 6] val_loss=0.0961
[RS 7/10] params={'hidden_size': 32, 'lstm_layers': 3, 'dropout': 0.3949692657420365, 'attention_head_size': 1, 'learning_rate': 0.00023470619688799343}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 778    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[RS 7] val_loss=0.2207
[RS 8/10] params={'hidden_size': 24, 'lstm_layers': 3, 'dropout': 0.15115723710618748, 'attention_head_size': 4, 'learning_rate': 0.00013846838736361294}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 778    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.0 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[RS 8] val_loss=0.1862
[RS 9/10] params={'hidden_size': 8, 'lstm_layers': 2, 'dropout': 0.3425192044349384, 'attention_head_size': 1, 'learning_rate': 0.00014630213143862117}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 484    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 1.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[RS 9] val_loss=0.2617
[RS 10/10] params={'hidden_size': 16, 'lstm_layers': 3, 'dropout': 0.3049790556476375, 'attention_head_size': 1, 'learning_rate': 0.0025162652440348767}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 778    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.3 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[RS 10] val_loss=0.1386

Best RS loss: 0.08912069350481033
Best RS params: {'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.15503043695984914, 'attention_head_size': 1, 'learning_rate': 0.0018308654580448125}

=== GLOBAL TFT: Bayesian Optimization (Optuna, 20 trials) ===


GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 778    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 34.9 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 10.5 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 4.3 K  | train
8  | sta


Best BO loss: 0.07907180488109589
Best BO params: {'hidden_size': 56, 'lstm_layers': 2, 'dropout': 0.1350785508017908, 'attention_head_size': 4, 'learning_rate': 0.0008807646711700635}

FINAL GLOBAL PARAMS: {'hidden_size': 56, 'lstm_layers': 2, 'dropout': 0.1350785508017908, 'attention_head_size': 4, 'learning_rate': 0.0008807646711700635}


GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 778    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 5.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 51.0 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 16.2 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 12.9 K | train
8  | sta

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=40` reached.



Best checkpoint path: D:\Documents\Skripsi\demand-forecasting\tft_checkpoints_global\tft_global_best.ckpt
Saved global TFT info to: D:\Documents\Skripsi\demand-forecasting\outputs\tft_global\tft_global_info.json


In [5]:
#03_train_cluster_tft.py
import warnings
warnings.filterwarnings("ignore")

from pathlib import Path
import json

import numpy as np
import pandas as pd

import torch
from lightning.pytorch import Trainer, seed_everything
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint
from lightning.pytorch.loggers import CSVLogger

from pytorch_forecasting.data import TimeSeriesDataSet
from pytorch_forecasting.data.encoders import GroupNormalizer
from pytorch_forecasting.models import TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss

import optuna
from optuna.pruners import MedianPruner


# =====================================================
# PATH & CONFIG
# =====================================================
PROJECT_ROOT = Path(r"D:\Documents\Skripsi\demand-forecasting")

CLUSTER_OUT_DIR = PROJECT_ROOT / "outputs" / "tft_clustered"
PANEL_PATH = CLUSTER_OUT_DIR / "panel_ready_tft.csv"

CHECKPOINT_ROOT = PROJECT_ROOT / "tft_checkpoints_cluster"
CHECKPOINT_ROOT.mkdir(parents=True, exist_ok=True)

seed_everything(42)

# budget tuning per cluster
N_RS = 5          # random search trials per cluster
N_TRIALS_BO = 10  # optuna trials per cluster


# =====================================================
# LOAD PANEL READY
# =====================================================
df = pd.read_csv(PANEL_PATH, parse_dates=["periode"])
df = df.sort_values(["cabang", "sku", "periode"]).reset_index(drop=True)

required_cols = [
    "area", "cabang", "sku", "periode",
    "qty", "qty_log", "is_train",
    "time_idx", "month", "year",
    "cluster_id",
]
for c in required_cols:
    if c not in df.columns:
        raise ValueError(f"Missing required column: {c}")

df["area"] = df["area"].astype(str)
df["cabang"] = df["cabang"].astype(str)
df["sku"] = df["sku"].astype(str)
df["cluster_id"] = df["cluster_id"].astype(str)

if "spike_flag" not in df.columns:
    df["spike_flag"] = 0
if "sample_weight" not in df.columns:
    df["sample_weight"] = 1.0

rolling_cols = [c for c in df.columns if "roll" in c]
lag_cols = [c for c in df.columns if c.startswith("qty_lag")]

print("Total rows panel_ready_tft:", len(df))
print("Unique clusters:", df["cluster_id"].unique())


# =====================================================
# HELPER: BUILD DATASET FOR ONE CLUSTER
# =====================================================
def build_datasets_for_cluster(df_cluster: pd.DataFrame):
    df_cluster = df_cluster.sort_values(["cabang", "sku", "periode"]).copy()

    df_train_c = df_cluster[df_cluster["is_train"] == 1].copy()
    if df_train_c.empty:
        raise ValueError("Train data empty for this cluster.")

    training_cutoff = df_train_c["time_idx"].max() - 6
    print("  training_cutoff:", training_cutoff)

    static_categoricals = ["area", "cabang", "sku", "cluster_id"]

    known_reals = [
        "time_idx",
        "month",
        "year",
        "event_flag",
        "event_flag_lag1",
        "holiday_count",
        "holiday_count_lag1",
        "rainfall_lag1",
        "spike_flag",
    ]
    known_reals = [c for c in known_reals if c in df_cluster.columns]

    unknown_reals = [c for c in rolling_cols if c in df_cluster.columns] + \
                    [c for c in lag_cols if c in df_cluster.columns]

    training_ds = TimeSeriesDataSet(
        df_train_c,
        time_idx="time_idx",
        target="qty_log",
        group_ids=["cabang", "sku"],
        weight="sample_weight",

        min_encoder_length=12,
        max_encoder_length=24,
        min_prediction_length=1,
        max_prediction_length=1,

        static_categoricals=static_categoricals,
        time_varying_known_reals=known_reals,
        time_varying_unknown_reals=unknown_reals,

        target_normalizer=GroupNormalizer(groups=["cabang", "sku"]),
        add_relative_time_idx=True,
        add_target_scales=True,
        add_encoder_length=True,

        min_prediction_idx=training_cutoff + 1,
    )

    train_loader = training_ds.to_dataloader(
        train=True,
        batch_size=64,
        num_workers=0,
    )
    val_loader = training_ds.to_dataloader(
        train=False,
        batch_size=64,
        num_workers=0,
    )

    print("  Num samples train:", len(train_loader.dataset))
    print("  Num samples val  :", len(val_loader.dataset))

    return training_ds, train_loader, val_loader


# =====================================================
# HELPER: PARAM SAMPLING & TUNING
# =====================================================
loss_q = QuantileLoss(quantiles=[0.5])


def sample_params():
    return {
        "hidden_size": int(np.random.choice([8, 16, 24, 32])),
        "lstm_layers": int(np.random.choice([1, 2])),
        "dropout": float(np.random.uniform(0.1, 0.4)),
        "attention_head_size": int(np.random.choice([1, 2, 4])),
        "learning_rate": float(np.random.uniform(1e-4, 3e-3)),
    }


def tune_tft_for_cluster(cluster_id, training_ds, train_loader, val_loader, out_dir: Path):
    # ---------------- RANDOM SEARCH ----------------
    rs_results = []
    print(f"\n=== [Cluster {cluster_id}] Random Search ({N_RS} trials) ===")
    for i in range(N_RS):
        params = sample_params()
        print(f"[RS {i+1}/{N_RS}] params={params}")

        model = TemporalFusionTransformer.from_dataset(
            training_ds,
            hidden_size=params["hidden_size"],
            lstm_layers=params["lstm_layers"],
            dropout=params["dropout"],
            attention_head_size=params["attention_head_size"],
            learning_rate=params["learning_rate"],
            loss=loss_q,
            output_size=1,
            log_interval=10,
            log_val_interval=1,
        )

        trainer = Trainer(
            max_epochs=15,
            accelerator="cpu",
            enable_progress_bar=False,
            callbacks=[
                EarlyStopping(monitor="val_loss", patience=3, mode="min"),
            ],
        )
        trainer.fit(model, train_loader, val_loader)

        val_loss = float(trainer.callback_metrics["val_loss"].item())
        rs_results.append((val_loss, params))
        print(f"[RS {i+1}] val_loss={val_loss:.4f}")

    best_rs_loss, best_rs_params = sorted(rs_results, key=lambda x: x[0])[0]
    print(f"[Cluster {cluster_id}] Best RS loss={best_rs_loss:.4f}")
    print(f"[Cluster {cluster_id}] Best RS params={best_rs_params}")

    # ---------------- OPTUNA (BO) ----------------
    def objective(trial: optuna.Trial) -> float:
        hidden_size = trial.suggest_int("hidden_size", 8, 64, step=8)
        lstm_layers = trial.suggest_int("lstm_layers", 1, 3)
        dropout = trial.suggest_float("dropout", 0.1, 0.5)
        attention_head_size = trial.suggest_categorical("attention_head_size", [1, 2, 4])
        learning_rate = trial.suggest_float("learning_rate", 1e-4, 3e-3, log=True)

        params = {
            "hidden_size": int(hidden_size),
            "lstm_layers": int(lstm_layers),
            "dropout": float(dropout),
            "attention_head_size": int(attention_head_size),
            "learning_rate": float(learning_rate),
        }

        model = TemporalFusionTransformer.from_dataset(
            training_ds,
            hidden_size=params["hidden_size"],
            lstm_layers=params["lstm_layers"],
            dropout=params["dropout"],
            attention_head_size=params["attention_head_size"],
            learning_rate=params["learning_rate"],
            loss=loss_q,
            output_size=1,
            log_interval=10,
            log_val_interval=1,
        )

        trainer = Trainer(
            max_epochs=20,
            accelerator="cpu",
            enable_progress_bar=False,
            callbacks=[
                EarlyStopping(monitor="val_loss", patience=4, mode="min"),
            ],
        )
        trainer.fit(model, train_loader, val_loader)
        val_loss = float(trainer.callback_metrics["val_loss"].item())
        return val_loss

    print(f"\n=== [Cluster {cluster_id}] Bayesian Optimization ({N_TRIALS_BO} trials) ===")
    study = optuna.create_study(
        direction="minimize",
        pruner=MedianPruner(n_warmup_steps=3),
    )
    study.optimize(objective, n_trials=N_TRIALS_BO, show_progress_bar=False)

    best_bo_params = study.best_trial.params
    best_bo_loss = study.best_value

    print(f"[Cluster {cluster_id}] Best BO loss={best_bo_loss:.4f}")
    print(f"[Cluster {cluster_id}] Best BO params={best_bo_params}")

    final_params = {
        "hidden_size": int(best_bo_params["hidden_size"]),
        "lstm_layers": int(best_bo_params["lstm_layers"]),
        "dropout": float(best_bo_params["dropout"]),
        "attention_head_size": int(best_bo_params["attention_head_size"]),
        "learning_rate": float(best_bo_params["learning_rate"]),
    }

    # ---------------- TRAIN FINAL MODEL ----------------
    logger = CSVLogger(save_dir=str(out_dir), name=f"tft_cluster_{cluster_id}")

    checkpoint_cb = ModelCheckpoint(
        dirpath=out_dir,
        filename=f"tft_cluster_{cluster_id}_best",
        monitor="val_loss",
        mode="min",
        save_top_k=1,
    )
    early_cb = EarlyStopping(monitor="val_loss", patience=8, mode="min")

    final_model = TemporalFusionTransformer.from_dataset(
        training_ds,
        hidden_size=final_params["hidden_size"],
        lstm_layers=final_params["lstm_layers"],
        dropout=final_params["dropout"],
        attention_head_size=final_params["attention_head_size"],
        learning_rate=final_params["learning_rate"],
        loss=loss_q,
        output_size=1,
        log_interval=10,
        log_val_interval=1,
    )

    trainer = Trainer(
        max_epochs=40,
        accelerator="cpu",
        logger=logger,
        callbacks=[checkpoint_cb, early_cb],
        enable_progress_bar=True,
    )
    trainer.fit(final_model, train_loader, val_loader)

    print(f"[Cluster {cluster_id}] Best checkpoint:", checkpoint_cb.best_model_path)

    return final_model, final_params, best_rs_loss, best_rs_params, best_bo_loss, best_bo_params, checkpoint_cb.best_model_path


# =====================================================
# TRAIN TFT PER CLUSTER
# =====================================================
all_clusters = sorted(df["cluster_id"].unique())
print("\nClusters to train:", all_clusters)

cluster_models_info = []

for cid in all_clusters:
    print("\n====================================")
    print(f"   TRAINING TFT FOR CLUSTER {cid}")
    print("====================================")

    df_c = df[df["cluster_id"] == cid].copy()
    if df_c["is_train"].sum() == 0:
        print(f"Cluster {cid}: no train data, skip.")
        continue

    cluster_out_dir = CHECKPOINT_ROOT / f"cluster_{cid}"
    cluster_out_dir.mkdir(parents=True, exist_ok=True)

    training_ds_c, train_loader_c, val_loader_c = build_datasets_for_cluster(df_c)

    (
        model_c,
        final_params_c,
        best_rs_loss_c,
        best_rs_params_c,
        best_bo_loss_c,
        best_bo_params_c,
        best_ckpt_c,
    ) = tune_tft_for_cluster(
        cid,
        training_ds_c,
        train_loader_c,
        val_loader_c,
        cluster_out_dir,
    )

    cluster_models_info.append({
        "cluster_id": cid,
        "final_params": final_params_c,
        "best_rs_loss": best_rs_loss_c,
        "best_rs_params": best_rs_params_c,
        "best_bo_loss": best_bo_loss_c,
        "best_bo_params": best_bo_params_c,
        "best_checkpoint": best_ckpt_c,
    })

# simpan info semua cluster
info_path = CLUSTER_OUT_DIR / "tft_cluster_models_info.json"
with open(info_path, "w") as f:
    json.dump(cluster_models_info, f, indent=2)

print("\nSaved cluster models info to:", info_path)


Seed set to 42


Total rows panel_ready_tft: 4965
Unique clusters: ['1' '0']

Clusters to train: ['0', '1']

   TRAINING TFT FOR CLUSTER 0
  training_cutoff: 34
  Num samples train: 432
  Num samples val  : 432

=== [Cluster 0] Random Search (5 trials) ===
[RS 1/5] params={'hidden_size': 24, 'lstm_layers': 2, 'dropout': 0.38521429192297485, 'attention_head_size': 4, 'learning_rate': 0.002361103900791031}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 115    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.9 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[RS 1] val_loss=0.1891
[RS 2/5] params={'hidden_size': 8, 'lstm_layers': 1, 'dropout': 0.23374982585607737, 'attention_head_size': 4, 'learning_rate': 0.00026844247528777846}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 115    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 1.5 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[RS 2] val_loss=0.3733
[RS 3/5] params={'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.2803345035229627, 'attention_head_size': 4, 'learning_rate': 0.00015969503345782712}


GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 115    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 34.9 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 10.5 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 4.3 K  | train
8  | sta

[RS 3] val_loss=0.2830
[RS 4/5] params={'hidden_size': 16, 'lstm_layers': 2, 'dropout': 0.34973279224012654, 'attention_head_size': 2, 'learning_rate': 0.00010225842093894156}


TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 115    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 2.3 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 20.0 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 6.6 K  | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K  | train
8  | static_context_initial_hidden_lstm | 

[RS 4] val_loss=0.3958
[RS 5/5] params={'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.285244452888315, 'attention_head_size': 2, 'learning_rate': 0.00162179365173349}


GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 115    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 34.9 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 10.5 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 4.3 K  | train
8  | sta

[RS 5] val_loss=0.2130
[Cluster 0] Best RS loss=0.1891
[Cluster 0] Best RS params={'hidden_size': 24, 'lstm_layers': 2, 'dropout': 0.38521429192297485, 'attention_head_size': 4, 'learning_rate': 0.002361103900791031}

=== [Cluster 0] Bayesian Optimization (10 trials) ===


GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 115    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 6.1 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 56.4 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 18.2 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 16.8 K | train
8  | sta

[Cluster 0] Best BO loss=0.1263
[Cluster 0] Best BO params={'hidden_size': 40, 'lstm_layers': 1, 'dropout': 0.17062739538309685, 'attention_head_size': 1, 'learning_rate': 0.0018532734026364811}


GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 115    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 4.2 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 40.2 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 12.4 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 6.6 K  | train
8  | sta

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=40` reached.


[Cluster 0] Best checkpoint: D:\Documents\Skripsi\demand-forecasting\tft_checkpoints_cluster\cluster_0\tft_cluster_0_best.ckpt

   TRAINING TFT FOR CLUSTER 1
  training_cutoff: 34


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.


  Num samples train: 1728
  Num samples val  : 1728

=== [Cluster 1] Random Search (5 trials) ===
[RS 1/5] params={'hidden_size': 32, 'lstm_layers': 1, 'dropout': 0.1873687420594126, 'attention_head_size': 4, 'learning_rate': 0.001259596817974241}


GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 676    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 34.9 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 10.5 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 4.3 K  | train
8  | sta

[RS 1] val_loss=0.0972
[RS 2/5] params={'hidden_size': 32, 'lstm_layers': 2, 'dropout': 0.39212665565243776, 'attention_head_size': 4, 'learning_rate': 0.0014226029542294043}


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 676    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[RS 2] val_loss=0.1139
[RS 3/5] params={'hidden_size': 24, 'lstm_layers': 2, 'dropout': 0.15990213464750794, 'attention_head_size': 4, 'learning_rate': 0.002951369568839686}


GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 676    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.0 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 28.1 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 8.6 K  | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 2.4 K  | train
8  | sta

[RS 3] val_loss=0.0895
[RS 4/5] params={'hidden_size': 8, 'lstm_layers': 1, 'dropout': 0.3579821220208962, 'attention_head_size': 4, 'learning_rate': 0.0005945199586931455}


GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 451    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 1.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 11.5 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 4.2 K  | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 304    | train
8  | sta

[RS 4] val_loss=0.1913
[RS 5/5] params={'hidden_size': 24, 'lstm_layers': 2, 'dropout': 0.384665661176, 'attention_head_size': 2, 'learning_rate': 0.0024443523095377374}


GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 676    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.0 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 28.1 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 8.6 K  | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 2.4 K  | train
8  | sta

[RS 5] val_loss=0.1196
[Cluster 1] Best RS loss=0.0895
[Cluster 1] Best RS params={'hidden_size': 24, 'lstm_layers': 2, 'dropout': 0.15990213464750794, 'attention_head_size': 4, 'learning_rate': 0.002951369568839686}

=== [Cluster 1] Bayesian Optimization (10 trials) ===


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 676    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 3.0 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork       

[Cluster 1] Best BO loss=0.0810
[Cluster 1] Best BO params={'hidden_size': 8, 'lstm_layers': 2, 'dropout': 0.1474613732473071, 'attention_head_size': 2, 'learning_rate': 0.002716077137542638}


GPU available: False, used: False
TPU available: False, using: 0 TPU cores

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 451    | train
3  | prescalers                         | ModuleDict                      | 496    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 1.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 11.5 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 4.2 K  | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 304    | train
8  | sta

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=40` reached.


[Cluster 1] Best checkpoint: D:\Documents\Skripsi\demand-forecasting\tft_checkpoints_cluster\cluster_1\tft_cluster_1_best.ckpt

Saved cluster models info to: D:\Documents\Skripsi\demand-forecasting\outputs\tft_clustered\tft_cluster_models_info.json


In [17]:
# 04_predict_all.py
import warnings
warnings.filterwarnings("ignore")

from pathlib import Path
import json

import numpy as np
import pandas as pd

import torch
from lightning.pytorch import seed_everything
from pytorch_forecasting.data import TimeSeriesDataSet
from pytorch_forecasting.data.encoders import GroupNormalizer
from pytorch_forecasting.models import TemporalFusionTransformer

seed_everything(42)


# =====================================================
# PATH CONFIG
# =====================================================
PROJECT_ROOT = Path(r"D:\Documents\Skripsi\demand-forecasting")

OUT_DIR = PROJECT_ROOT / "outputs" / "tft_clustered"
PANEL_PATH = OUT_DIR / "panel_ready_tft.csv"

GLOBAL_INFO = PROJECT_ROOT / "outputs" / "tft_global" / "tft_global_info.json"
GLOBAL_CKPT = PROJECT_ROOT / "tft_checkpoints_global" / "tft_global_best.ckpt"

CLUSTER_INFO = OUT_DIR / "tft_cluster_models_info.json"   # sudah sesuai JSON yang kamu kirim
CLUSTER_CKPT_ROOT = PROJECT_ROOT / "tft_checkpoints_cluster"


# =====================================================
# LOAD PANEL
# =====================================================
df = pd.read_csv(PANEL_PATH, parse_dates=["periode"])
df = df.sort_values(["cabang", "sku", "periode"]).reset_index(drop=True)

if "area" in df.columns:
    df["area"] = df["area"].astype(str)
else:
    df["area"] = "NA"

df["cabang"] = df["cabang"].astype(str)
df["sku"] = df["sku"].astype(str)

if "cluster_id" in df.columns:
    df["cluster_id"] = df["cluster_id"].astype(str)
else:
    raise ValueError("Kolom 'cluster_id' tidak ditemukan di panel_ready_tft.csv")

rolling_cols = [c for c in df.columns if "roll" in c]
lag_cols = [c for c in df.columns if c.startswith("qty_lag")]

print("Panel rows:", len(df))
print("Contoh kolom:", df.columns.tolist())


# =====================================================
# LOAD GLOBAL MODEL
# =====================================================
print("\n=== LOADING GLOBAL MODEL ===")
global_info = json.load(open(GLOBAL_INFO, "r"))
global_model = TemporalFusionTransformer.load_from_checkpoint(str(GLOBAL_CKPT))
print("Loaded global model checkpoint:", GLOBAL_CKPT)


# =====================================================
# LOAD CLUSTER MODELS
# =====================================================
print("\n=== LOADING CLUSTER MODELS ===")
cluster_info = json.load(open(CLUSTER_INFO, "r"))

cluster_models = {}
for item in cluster_info:
    cid = str(item["cluster_id"])
    # JSON kamu pakai key 'best_checkpoint', bukan 'checkpoint'
    ckpt_path = item["best_checkpoint"]
    print(f"  Cluster {cid}: load {ckpt_path}")
    model_c = TemporalFusionTransformer.load_from_checkpoint(ckpt_path)
    cluster_models[cid] = model_c


# =====================================================
# HELPER: BUILD PREDICT DATASET (TANPA predict=True)
# =====================================================
def build_predict_dataset(df_input, training_ds_template):
    ds = TimeSeriesDataSet.from_dataset(
        training_ds_template,
        df_input,
        stop_randomization=True,
    )
    return ds


# =====================================================
# HELPER: inverse log1p
# =====================================================
def inv_log(x):
    return np.expm1(x)


# =====================================================
# 1) GLOBAL MODEL PREDICTION
# =====================================================
print("\n=== PREDICT USING GLOBAL MODEL ===")

df_train = df[df["is_train"] == 1].copy()

training_ds_global = TimeSeriesDataSet(
    df_train,
    time_idx="time_idx",
    target="qty_log",
    group_ids=["cabang", "sku"],
    weight="sample_weight",

    min_encoder_length=12,
    max_encoder_length=24,
    min_prediction_length=1,
    max_prediction_length=1,

    static_categoricals=["area", "cabang", "sku", "cluster_id"],
    time_varying_known_reals=[
        c for c in [
            "time_idx", "month", "year",
            "event_flag", "event_flag_lag1",
            "holiday_count", "holiday_count_lag1",
            "rainfall_lag1", "spike_flag"
        ]
        if c in df.columns
    ],
    time_varying_unknown_reals=rolling_cols + lag_cols,

    target_normalizer=GroupNormalizer(groups=["cabang", "sku"]),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,

    min_prediction_idx=df_train["time_idx"].max() - 6 + 1,
)

predict_ds_global = build_predict_dataset(df, training_ds_global)

raw_pred_global = global_model.predict(predict_ds_global, return_index=True)
pred_log_global = raw_pred_global.output.squeeze(-1)
pred_idx = raw_pred_global.index

df_pred_global = pd.DataFrame({
    "cabang": pred_idx["cabang"],
    "sku": pred_idx["sku"],
    "time_idx": pred_idx["time_idx"],
    "qty_pred_log": pred_log_global,
})
df_pred_global["qty_pred"] = inv_log(df_pred_global["qty_pred_log"])

df_pred_global = df_pred_global.merge(
    df[["cabang", "sku", "time_idx", "qty", "periode",
        "is_train", "is_test", "area", "cluster_id"]],
    on=["cabang", "sku", "time_idx"],
    how="left"
)

df_pred_global = (
    df_pred_global
    .groupby(["cabang", "sku", "time_idx", "periode"], as_index=False)
    .agg(
        qty_pred=("qty_pred", "mean"),
        qty_pred_log=("qty_pred_log", "mean"),
        qty=("qty", "first"),
        is_train=("is_train", "max"),
        is_test=("is_test", "max"),
        area=("area", "first"),
        cluster_id=("cluster_id", "first"),
    )
)

global_pred_path = OUT_DIR / "tft_global_predictions.csv"
df_pred_global.to_csv(global_pred_path, index=False)
print("Saved global predictions to:", global_pred_path)


# =====================================================
# 2) CLUSTER MODEL PREDICTION
# =====================================================
print("\n=== PREDICT USING CLUSTER MODELS ===")

pred_cluster_rows = []

for cid, model_c in cluster_models.items():
    print(f"\n--- Predict for cluster {cid} ---")
    df_c = df[df["cluster_id"] == cid].copy()
    if df_c.empty:
        print(f"Cluster {cid}: panel kosong, skip.")
        continue

    df_train_c = df_c[df_c["is_train"] == 1].copy()
    if df_train_c.empty:
        print(f"Cluster {cid}: tidak ada data train, skip.")
        continue

    training_cutoff = df_train_c["time_idx"].max() - 6

    training_ds_c = TimeSeriesDataSet(
        df_train_c,
        time_idx="time_idx",
        target="qty_log",
        group_ids=["cabang", "sku"],
        weight="sample_weight",

        min_encoder_length=12,
        max_encoder_length=24,
        min_prediction_length=1,
        max_prediction_length=1,

        static_categoricals=["area", "cabang", "sku", "cluster_id"],
        time_varying_known_reals=[
            c for c in [
                "time_idx", "month", "year",
                "event_flag", "event_flag_lag1",
                "holiday_count", "holiday_count_lag1",
                "rainfall_lag1", "spike_flag"
            ]
            if c in df_c.columns
        ],
        time_varying_unknown_reals=rolling_cols + lag_cols,

        target_normalizer=GroupNormalizer(groups=["cabang", "sku"]),
        add_relative_time_idx=True,
        add_target_scales=True,
        add_encoder_length=True,

        min_prediction_idx=training_cutoff + 1,
    )

    predict_ds_c = build_predict_dataset(df_c, training_ds_c)

    raw_pred_c = model_c.predict(predict_ds_c, return_index=True)
    pred_log_c = raw_pred_c.output.squeeze(-1)
    pred_idx_c = raw_pred_c.index

    df_pred_c = pd.DataFrame({
        "cabang": pred_idx_c["cabang"],
        "sku": pred_idx_c["sku"],
        "time_idx": pred_idx_c["time_idx"],
        "qty_pred_log": pred_log_c,
    })
    df_pred_c["qty_pred"] = inv_log(df_pred_c["qty_pred_log"])

    df_pred_c = df_pred_c.merge(
        df_c[["cabang", "sku", "time_idx", "periode",
              "qty", "is_train", "is_test", "area", "cluster_id"]],
        on=["cabang", "sku", "time_idx"],
        how="left"
    )

    df_pred_c = (
        df_pred_c
        .groupby(["cabang", "sku", "time_idx", "periode"], as_index=False)
        .agg(
            qty_pred=("qty_pred", "mean"),
            qty_pred_log=("qty_pred_log", "mean"),
            qty=("qty", "first"),
            is_train=("is_train", "max"),
            is_test=("is_test", "max"),
            area=("area", "first"),
            cluster_id=("cluster_id", "first"),
        )
    )

    pred_cluster_rows.append(df_pred_c)

if pred_cluster_rows:
    df_all_clusters = pd.concat(pred_cluster_rows, ignore_index=True)
else:
    df_all_clusters = pd.DataFrame(columns=[
        "cabang", "sku", "time_idx", "periode",
        "qty_pred", "qty_pred_log", "qty",
        "is_train", "is_test", "area", "cluster_id"
    ])

cluster_pred_path = OUT_DIR / "tft_cluster_predictions.csv"
df_all_clusters.to_csv(cluster_pred_path, index=False)

print("Saved cluster predictions to:", cluster_pred_path)
print("\nPrediction pipeline complete.")


Seed set to 42


Panel rows: 4965
Contoh kolom: ['area', 'cabang', 'sku', 'periode', 'qty', 'event_flag', 'event_flag_lag1', 'holiday_count', 'holiday_count_lag1', 'rainfall_lag1', 'is_train', 'is_test', 'imputed', 'spike_flag', 'sample_weight', 'month', 'year', 'qtr', 'qty_lag1', 'qty_lag2', 'qty_lag3', 'qty_lag4', 'qty_lag5', 'qty_lag6', 'qty_lag7', 'qty_lag8', 'qty_lag9', 'qty_lag10', 'qty_lag11', 'qty_lag12', 'qty_rollmean_3', 'qty_rollstd_3', 'qty_rollmean_6', 'qty_rollstd_6', 'qty_rollmean_12', 'qty_rollstd_12', 'cluster_id', 'qty_log', 'time_idx']

=== LOADING GLOBAL MODEL ===
Loaded global model checkpoint: D:\Documents\Skripsi\demand-forecasting\tft_checkpoints_global\tft_global_best.ckpt

=== LOADING CLUSTER MODELS ===
  Cluster 0: load D:\Documents\Skripsi\demand-forecasting\tft_checkpoints_cluster\cluster_0\tft_cluster_0_best.ckpt
  Cluster 1: load D:\Documents\Skripsi\demand-forecasting\tft_checkpoints_cluster\cluster_1\tft_cluster_1_best.ckpt


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.



=== PREDICT USING GLOBAL MODEL ===


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores


Saved global predictions to: D:\Documents\Skripsi\demand-forecasting\outputs\tft_clustered\tft_global_predictions.csv

=== PREDICT USING CLUSTER MODELS ===

--- Predict for cluster 0 ---


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.



--- Predict for cluster 1 ---


GPU available: False, used: False
TPU available: False, using: 0 TPU cores


Saved cluster predictions to: D:\Documents\Skripsi\demand-forecasting\outputs\tft_clustered\tft_cluster_predictions.csv

Prediction pipeline complete.


In [18]:
# 05_evaluate_and_plot.py
import warnings
warnings.filterwarnings("ignore")

from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import mean_squared_error, mean_absolute_error
from datetime import datetime

EVAL_START = pd.Timestamp("2024-06-01")
EVAL_END   = pd.Timestamp("2024-10-01")


# =====================================================
# PATH CONFIG
# =====================================================
PROJECT_ROOT = Path(r"D:\Documents\Skripsi\demand-forecasting")

OUT_DIR = PROJECT_ROOT / "outputs" / "tft_clustered"

GLOBAL_PRED_PATH = OUT_DIR / "tft_global_predictions.csv"
CLUSTER_PRED_PATH = OUT_DIR / "tft_cluster_predictions.csv"

EVAL_OUT = OUT_DIR / "evaluation_outputs"
EVAL_OUT.mkdir(parents=True, exist_ok=True)


# =====================================================
# LOAD PREDICTIONS
# =====================================================
df_g = pd.read_csv(GLOBAL_PRED_PATH, parse_dates=["periode"])
df_c = pd.read_csv(CLUSTER_PRED_PATH, parse_dates=["periode"])

print("Global pred rows:", len(df_g))
print("Cluster pred rows:", len(df_c))


# =====================================================
# METRIC CALCULATOR
# =====================================================
def calc_metrics(df):
    df = df.dropna(subset=["qty", "qty_pred"])
    if df.empty:
        return None

    y_true = df["qty"]
    y_pred = df["qty_pred"]

    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)

    # avoid division by zero
    nonzero = y_true != 0
    if nonzero.sum() > 0:
        mape = np.mean(np.abs((y_true[nonzero] - y_pred[nonzero]) / y_true[nonzero])) * 100
    else:
        mape = np.nan

    smape = np.mean(
        2 * np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true) + 1e-9)
    ) * 100

    return {
        "RMSE": rmse,
        "MAE": mae,
        "MAPE%": mape,
        "sMAPE%": smape,
        "MSE": mse
    }


# =====================================================
# EVALUATE GLOBAL MODEL
# =====================================================
print("\n=== GLOBAL MODEL EVALUATION ===")

metrics_global_train = calc_metrics(df_g[df_g["is_train"] == 1])
metrics_global_test = calc_metrics(df_g[df_g["is_test"] == 1])

print("Train:", metrics_global_train)
print("Test:", metrics_global_test)


# =====================================================
# EVALUATE CLUSTER MODEL
# =====================================================
print("\n=== CLUSTER MODEL EVALUATION ===")

metrics_cluster_train = calc_metrics(df_c[df_c["is_train"] == 1])
metrics_cluster_test = calc_metrics(df_c[df_c["is_test"] == 1])

print("Train:", metrics_cluster_train)
print("Test:", metrics_cluster_test)


# =====================================================
# SAVE SUMMARY CSV
# =====================================================
summary_df = pd.DataFrame([
    {
        "Model": "Global TFT",
        "RMSE_train": metrics_global_train["RMSE"],
        "MAE_train": metrics_global_train["MAE"],
        "MAPE%_train": metrics_global_train["MAPE%"],
        "sMAPE%_train": metrics_global_train["sMAPE%"],
        "RMSE_test": metrics_global_test["RMSE"],
        "MAE_test": metrics_global_test["MAE"],
        "MAPE%_test": metrics_global_test["MAPE%"],
        "sMAPE%_test": metrics_global_test["sMAPE%"],
    },
    {
        "Model": "Cluster TFT",
        "RMSE_train": metrics_cluster_train["RMSE"],
        "MAE_train": metrics_cluster_train["MAE"],
        "MAPE%_train": metrics_cluster_train["MAPE%"],
        "sMAPE%_train": metrics_cluster_train["sMAPE%"],
        "RMSE_test": metrics_cluster_test["RMSE"],
        "MAE_test": metrics_cluster_test["MAE"],
        "MAPE%_test": metrics_cluster_test["MAPE%"],
        "sMAPE%_test": metrics_cluster_test["sMAPE%"],
    }
])

summary_path = EVAL_OUT / "tft_model_comparison_summary.csv"
summary_df.to_csv(summary_path, index=False)
print("\nSaved summary:", summary_path)


# =====================================================
# PLOT PRED VS ACTUAL (ONLY SKU TEST, 5 BULAN)
# =====================================================
print("\n=== PLOTTING PRED vs ACTUAL (TEST SKUs) ===")

mask_test_period_g = (
    (df_g["periode"] >= EVAL_START) &
    (df_g["periode"] <= EVAL_END) &
    (df_g["is_test"] == 1)
)
mask_test_period_c = (
    (df_c["periode"] >= EVAL_START) &
    (df_c["periode"] <= EVAL_END) &
    (df_c["is_test"] == 1)
)

df_test_global = df_g[mask_test_period_g].copy()
df_test_cluster = df_c[mask_test_period_c].copy()

print("Rows df_test_global:", len(df_test_global))
print("Rows df_test_cluster:", len(df_test_cluster))

test_skus = (
    df_test_global
    .groupby(["cabang", "sku"], as_index=False)
    .size()[["cabang", "sku"]]
    .itertuples(index=False, name=None)
)

for cab, sku in test_skus:
    fig, ax = plt.subplots(figsize=(10, 4))

    g_sku = df_test_global[(df_test_global["cabang"] == cab) & (df_test_global["sku"] == sku)]
    c_sku = df_test_cluster[(df_test_cluster["cabang"] == cab) & (df_test_cluster["sku"] == sku)]

    plt.plot(g_sku["periode"], g_sku["qty"], label="Actual", marker="o")
    plt.plot(g_sku["periode"], g_sku["qty_pred"], label="Global TFT", marker="x")
    plt.plot(c_sku["periode"], c_sku["qty_pred"], label="Cluster TFT", marker="s")

    plt.title(f"Pred vs Actual â€“ {cab} | {sku}")
    plt.xlabel("Periode")
    plt.ylabel("Qty")
    plt.legend()
    plt.grid(True)

    save_path = EVAL_OUT / f"plot_pred_actual_{cab}_{sku}.png"
    plt.savefig(save_path)
    plt.close()

    print("Saved:", save_path)


# =====================================================
# ERROR DISTRIBUTION PLOT (TEST)
# =====================================================
print("\n=== ERROR DISTRIBUTION ===")

df_test_global["error"] = df_test_global["qty_pred"] - df_test_global["qty"]
df_test_cluster["error"] = df_test_cluster["qty_pred"] - df_test_cluster["qty"]

plt.figure(figsize=(8, 5))
sns.kdeplot(df_test_global["error"], label="Global", shade=True)
sns.kdeplot(df_test_cluster["error"], label="Cluster", shade=True)
plt.title("Error Distribution (Test SKUs)")
plt.xlabel("Error")
plt.legend()
plt.grid(True)

err_plot_path = EVAL_OUT / "error_distribution.png"
plt.savefig(err_plot_path)
plt.close()

print("Saved:", err_plot_path)


# =====================================================
# ANALISIS OVERFIT
# =====================================================
def overfit_status(train_rmse, test_rmse):
    if test_rmse < train_rmse * 1.1:
        return "No Overfit"
    elif test_rmse < train_rmse * 1.3:
        return "Mild Overfit"
    else:
        return "Strong Overfit"


analysis_text = f"""
GLOBAL TFT:
  Train RMSE = {metrics_global_train['RMSE']:.4f}
  Test RMSE  = {metrics_global_test['RMSE']:.4f}
  Status     = {overfit_status(metrics_global_train['RMSE'], metrics_global_test['RMSE'])}

CLUSTER TFT:
  Train RMSE = {metrics_cluster_train['RMSE']:.4f}
  Test RMSE  = {metrics_cluster_test['RMSE']:.4f}
  Status     = {overfit_status(metrics_cluster_train['RMSE'], metrics_cluster_test['RMSE'])}
"""

with open(EVAL_OUT / "overfit_analysis.txt", "w") as f:
    f.write(analysis_text)

print("\nSaved overfit analysis.")


# =====================================================
# METRICS PER CABANG, SKU (TEST SET)
# =====================================================
print("\n=== METRICS PER CABANG, SKU (TEST) ===")

rows = []

pairs = (
    df_test_global
    .groupby(["cabang", "sku"], as_index=False)
    .size()[["cabang", "sku"]]
    .itertuples(index=False, name=None)
)

for cab, sku in pairs:
    g_sku = df_test_global[
        (df_test_global["cabang"] == cab) &
        (df_test_global["sku"] == sku)
    ].copy()

    c_sku = df_test_cluster[
        (df_test_cluster["cabang"] == cab) &
        (df_test_cluster["sku"] == sku)
    ].copy()

    m_g = calc_metrics(g_sku) if not g_sku.empty else None
    m_c = calc_metrics(c_sku) if not c_sku.empty else None

    rows.append({
        "cabang": cab,
        "sku": sku,
        "n_test_global": len(g_sku),
        "n_test_cluster": len(c_sku),

        "RMSE_test_global":   m_g["RMSE"]    if m_g else np.nan,
        "MAE_test_global":    m_g["MAE"]     if m_g else np.nan,
        "MAPE%_test_global":  m_g["MAPE%"]   if m_g else np.nan,
        "sMAPE%_test_global": m_g["sMAPE%"]  if m_g else np.nan,

        "RMSE_test_cluster":   m_c["RMSE"]    if m_c else np.nan,
        "MAE_test_cluster":    m_c["MAE"]     if m_c else np.nan,
        "MAPE%_test_cluster":  m_c["MAPE%"]   if m_c else np.nan,
        "sMAPE%_test_cluster": m_c["sMAPE%"]  if m_c else np.nan,
    })

metrics_by_series = pd.DataFrame(rows)

per_series_path = EVAL_OUT / "tft_metrics_by_cabang_sku_test.csv"
metrics_by_series.to_csv(per_series_path, index=False)

print("Saved per-series metrics to:", per_series_path)


Global pred rows: 765
Cluster pred rows: 765

=== GLOBAL MODEL EVALUATION ===
Train: {'RMSE': 393.000397421529, 'MAE': 194.89032322222224, 'MAPE%': 14.843127689246277, 'sMAPE%': 12.934608051845562, 'MSE': 154449.3123734797}
Test: {'RMSE': 3096.2064072425774, 'MAE': 2013.0413948888888, 'MAPE%': 82.80386176662404, 'sMAPE%': 50.9636129084963, 'MSE': 9586494.11624999}

=== CLUSTER MODEL EVALUATION ===
Train: {'RMSE': 436.91772823990857, 'MAE': 224.26716758333333, 'MAPE%': 17.335472010507946, 'sMAPE%': 15.759628581975038, 'MSE': 190897.10125032262}
Test: {'RMSE': 2955.7340890080127, 'MAE': 1941.4880057777782, 'MAPE%': 74.085400807984, 'sMAPE%': 48.71024570047146, 'MSE': 8736364.004924025}

Saved summary: D:\Documents\Skripsi\demand-forecasting\outputs\tft_clustered\evaluation_outputs\tft_model_comparison_summary.csv

=== PLOTTING PRED vs ACTUAL (TEST SKUs) ===
Rows df_test_global: 45
Rows df_test_cluster: 45
Saved: D:\Documents\Skripsi\demand-forecasting\outputs\tft_clustered\evaluation_out

In [13]:
panel = pd.read_csv(
    r"D:\Documents\Skripsi\demand-forecasting\outputs\tft_clustered\panel_ready_tft.csv",
    parse_dates=["periode"]
)

panel_02A = panel[(panel["cabang"]=="02A") & (panel["sku"]=="BUVW001KSW")][
    ["periode","qty","is_train","is_test"]
].sort_values("periode")
print(panel_02A.tail(15))


       periode     qty  is_train  is_test
236 2023-08-01  1910.0         1        0
237 2023-09-01  2913.0         1        0
238 2023-10-01  5146.0         1        0
239 2023-11-01  1798.0         1        0
240 2023-12-01  4504.0         1        0
241 2024-01-01  3220.0         1        0
242 2024-02-01  1833.0         1        0
243 2024-03-01  6807.0         1        0
244 2024-04-01  1551.0         1        0
245 2024-05-01  5553.0         1        0
246 2024-06-01  2306.0         0        1
247 2024-07-01  2395.0         0        1
248 2024-08-01  1515.0         0        1
249 2024-09-01  5702.0         0        1
250 2024-10-01  1298.0         0        1
