<a href="https://colab.research.google.com/github/karasu1982/colab_notebook/blob/main/202510_AttnBoost_Retail_Supply_Chain_Sales_Insights_via_Gradient_Boosting_Perspective.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import gc
import math
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

In [None]:
# ========================================
# データ取得 (UCI Online Retail)
# ========================================
UCI_URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/00352/Online%20Retail.xlsx"
df = pd.read_excel(UCI_URL)

# 基本整形
df = df[~df['InvoiceNo'].astype(str).str.startswith('C')]
df = df[(df['Quantity']>0) & (df['UnitPrice']>0)]
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
df['Sales'] = df['Quantity']*df['UnitPrice']
df['date'] = df['InvoiceDate'].dt.date

df[["date","Country", "StockCode", "Quantity", "UnitPrice"]].to_csv("retail_sales.csv")

In [None]:
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,Sales,date
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom,15.3,2010-12-01
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,20.34,2010-12-01
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom,22.0,2010-12-01
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,20.34,2010-12-01
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,20.34,2010-12-01


In [None]:
# ===== モデル群（LightGBM優先、無ければXGBoost、なければsklearn HGB） =====
GBDT_BACKEND = None
try:
    import lightgbm as lgb
    GBDT_BACKEND = "lightgbm"
except Exception:
    try:
        import xgboost as xgb
        GBDT_BACKEND = "xgboost"
    except Exception:
        from sklearn.ensemble import HistGradientBoostingRegressor
        GBDT_BACKEND = "sklearn"

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GroupKFold

# ===== Attention Encoder (PyTorch) =====
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

<torch._C.Generator at 0x7b156d14dad0>

In [None]:
# ----------------------------
# ユーザー設定
# ----------------------------
# DATA_PATH = "retail_sales.csv" # Noneなら合成データを生成
DATA_PATH = None
DATE_COL = "date"
STORE_COL = "Country"
SKU_COL = "StockCode"
TARGET_COL = "Quantity"
PRICE_COL = "UnitPrice"
PROMO_COL = "on_promo"  # 0/1 (True/Falseも可)
HAS_HEADER_MAPPING = False  # CSV列名が上記と異なる場合 True にして MAPPING を編集

# 列名マッピング例（あなたのCSVに合わせて変更）
MAPPING = {
    "date": "date",
    "store_id": "Country",
    "sku_id": "StockCode",
    "sales": "Quantity",
    "price": "UnitPrice",
    # "on_promo": "on_promo",
    # もし在庫や天気などがあれば追記: "stock": "stock", "temp": "temperature", ...
}

FREQ = "W"  # 週次想定（日次は "D"）
LAG_WEEKS = [1, 2, 3, 4, 6, 8, 12]
ROLL_WINS = [2, 4, 8, 12]
ATTN_WINDOW = 8  # 注意ウィンドウ長（直近8期間）
FORECAST_HORIZON_WEEKS = 4  # 未来予測のホライズン
BATCH_SIZE = 256
AT_EPOCHS = 8
LR = 1e-3
NUM_WORKERS = 0

In [None]:
# ----------------------------
# 合成データ作成（CSVが無いとき）
# ----------------------------
def make_synthetic(n_store=20, n_sku=100, weeks=120):
    idx = pd.date_range("2022-01-02", periods=weeks, freq=FREQ)
    rows = []
    for s in range(n_store):
        for k in range(n_sku):
            base = np.random.uniform(5, 50)
            trend = np.linspace(0, np.random.uniform(-1, 3), weeks)
            season = 10*np.sin(np.arange(weeks)/6.0)  # 簡易季節
            noise = np.random.normal(0, 5, weeks)
            promo = (np.random.rand(weeks) < 0.15).astype(int)
            price = np.clip(np.random.normal(100, 15, weeks) - promo*5, 60, 160)
            sales = np.maximum(0, base + trend + season + noise + (promo*8) - 0.05*(price-100))
            # 雑に整数化
            sales = np.round(sales).astype(int)

            rows.append(pd.DataFrame({
                DATE_COL: idx,
                STORE_COL: s,
                SKU_COL: k,
                TARGET_COL: sales,
                PRICE_COL: price,
                PROMO_COL: promo
            }))
    df = pd.concat(rows, ignore_index=True)
    return df

In [None]:
# ----------------------------
# データ読み込み
# ----------------------------
if DATA_PATH and os.path.exists(DATA_PATH):
    df = pd.read_csv(DATA_PATH)
    if HAS_HEADER_MAPPING:
        df = df.rename(columns={v: k for k, v in MAPPING.items()})
    # 型調整
    df[DATE_COL] = pd.to_datetime(df[DATE_COL])
else:
    df = make_synthetic()
    print("[INFO] 合成データを生成しました。shape:", df.shape)

# 週次の完全グリッド化（欠損週の埋め）
def complete_grid(df):
    # 各 (store, sku) × 週のフルグリッド
    keys = df[[STORE_COL, SKU_COL]].drop_duplicates()
    all_dates = pd.DataFrame({DATE_COL: pd.date_range(df[DATE_COL].min(), df[DATE_COL].max(), freq=FREQ)})
    grid = keys.assign(_tmp=1).merge(all_dates.assign(_tmp=1), on="_tmp").drop("_tmp", axis=1)
    df2 = grid.merge(df, on=[STORE_COL, SKU_COL, DATE_COL], how="left")
    # 欠損補完
    if PRICE_COL in df2.columns:
        df2[PRICE_COL] = df2.groupby([STORE_COL, SKU_COL])[PRICE_COL].ffill().bfill().fillna(df2[PRICE_COL].median())
    if PROMO_COL in df2.columns:
        df2[PROMO_COL] = df2[PROMO_COL].fillna(0).astype(int)
    df2[TARGET_COL] = df2[TARGET_COL].fillna(0)
    return df2

df = complete_grid(df)
df = df.sort_values([STORE_COL, SKU_COL, DATE_COL]).reset_index(drop=True)

# データのサイズを小さくするために日付でフィルタリング
# データフレームの約 3/4 を使用するように日付範囲を調整
date_min = df[DATE_COL].min()
date_max = df[DATE_COL].max()
date_filter_threshold = date_min + (date_max - date_min) * 0.75
df = df[df[DATE_COL] <= date_filter_threshold].reset_index(drop=True)

[INFO] 合成データを生成しました。shape: (240000, 6)


In [None]:
# ----------------------------
# カレンダー特徴
# ----------------------------
df["dow"] = df[DATE_COL].dt.weekday  # 週次なら同値になるが日次の際に有効
df["weekofyear"] = df[DATE_COL].dt.isocalendar().week.astype(int)
df["month"] = df[DATE_COL].dt.month
df["year"] = df[DATE_COL].dt.year

In [None]:
df.head()

Unnamed: 0,Country,StockCode,date,Quantity,UnitPrice,on_promo,dow,weekofyear,month,year
0,0,0,2022-01-02,25,95.270961,0,6,52,1,2022
1,0,0,2022-01-09,31,111.384538,0,6,1,1,2022
2,0,0,2022-01-16,25,88.407622,0,6,2,1,2022
3,0,0,2022-01-23,26,96.447721,0,6,3,1,2022
4,0,0,2022-01-30,45,87.719547,1,6,4,1,2022


In [None]:
# ----------------------------
# ラグ＆移動統計
# ----------------------------
def add_lags_rolls(df, group_cols, target):
    df = df.copy()
    g = df.groupby(group_cols, group_keys=False)
    for l in LAG_WEEKS:
        df[f"{target}_lag{l}"] = g[target].shift(l)
    for w in ROLL_WINS:
        df[f"{target}_roll{w}_mean"] = g[target].shift(1).rolling(w).mean()
        df[f"{target}_roll{w}_std"]  = g[target].shift(1).rolling(w).std()
    return df

df = add_lags_rolls(df, [STORE_COL, SKU_COL], TARGET_COL)

# 発注・価格のラグも（任意）
if PRICE_COL in df.columns:
    g = df.groupby([STORE_COL, SKU_COL], group_keys=False)
    df[f"{PRICE_COL}_lag1"] = g[PRICE_COL].shift(1)
if PROMO_COL in df.columns:
    g = df.groupby([STORE_COL, SKU_COL], group_keys=False)
    df[f"{PROMO_COL}_lag1"] = g[PROMO_COL].shift(1)

# 欠損の除去（学習用に最低限のラグが埋まった期間以降へ）
min_lag = max([0] + LAG_WEEKS + ROLL_WINS)
df = df.groupby([STORE_COL, SKU_COL], group_keys=False).apply(lambda x: x.iloc[min_lag:]).reset_index(drop=True)


In [None]:
# ----------------------------
# Attention 用データセット
#   - (store, sku)ごとに直近 ATTN_WINDOW 期間の sales/price/promo を入力にし、次期の sales を予測
#   - 学習後、最終中間ベクトル(文脈)を「特徴」として抽出
# ----------------------------

INPUT_FEATS_FOR_ATTN = []
# 入力チャンネル（ターゲットと価格/販促など）
INPUT_FEATS_FOR_ATTN.append(TARGET_COL)
if PRICE_COL in df.columns:
    INPUT_FEATS_FOR_ATTN.append(PRICE_COL)
if PROMO_COL in df.columns:
    INPUT_FEATS_FOR_ATTN.append(PROMO_COL)

class AttnDataset(Dataset):
    def __init__(self, frame, group_cols, input_feats, target_col, window=8):
        self.frame = frame
        self.group_cols = group_cols
        self.input_feats = input_feats
        self.target_col = target_col
        self.window = window

        # インデックス作成: 各行 i について i-window..i-1 を使用して i のターゲットを予測
        self.idxs = []
        gobj = frame.groupby(group_cols, group_keys=False)
        for _, g in gobj:
            arr = np.arange(len(g))
            # ウィンドウ直後から終端-1 まで
            valid = arr[window:]
            # グローバル行番号に変換
            base = g.index.to_numpy()
            for i in valid:
                tgt_row = base[i]
                hist_rows = base[i-window:i]
                self.idxs.append((hist_rows, tgt_row))

    def __len__(self):
        return len(self.idxs)

    def __getitem__(self, idx):
        hist_rows, tgt_row = self.idxs[idx]
        X_seq = torch.tensor(self.frame.loc[hist_rows, self.input_feats].to_numpy(dtype=np.float32))
        y = torch.tensor(self.frame.loc[tgt_row, self.target_col], dtype=torch.float32)
        return X_seq, y

class SelfAttentionEncoder(nn.Module):
    def __init__(self, in_ch, d_model=32, nhead=4, num_layers=1, out_hidden=32):
        super().__init__()
        self.proj = nn.Linear(in_ch, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, batch_first=True)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.pool = nn.AdaptiveAvgPool1d(1)
        self.head = nn.Sequential(
            nn.Linear(d_model, out_hidden),
            nn.ReLU(),
            nn.Linear(out_hidden, 1)
        )

    def forward(self, x):  # x: (B, T, C)
        h = self.proj(x)
        z = self.encoder(h)          # (B, T, d_model)
        # mean pooling over time
        ctx = z.mean(dim=1)          # (B, d_model)
        yhat = self.head(ctx).squeeze(-1)
        return yhat, ctx             # ctx を特徴として使う


In [None]:
# データ分割（時系列リークを避けるため、最後のN週をバリデ & テスト）
# ここでは簡便にグループKFold + 末尾期間をバリデ/テストに
df["_group"] = df[STORE_COL].astype(str) + "_" + df[SKU_COL].astype(str)

In [None]:
# Attention 学習用フレームは最小限の列に絞る（速度）
attn_cols = [STORE_COL, SKU_COL, DATE_COL] + INPUT_FEATS_FOR_ATTN + [TARGET_COL, "_group"]
# Remove duplicate target column if it exists in INPUT_FEATS_FOR_ATTN
if TARGET_COL in INPUT_FEATS_FOR_ATTN:
  attn_cols = [c for c in attn_cols if c != TARGET_COL] + [TARGET_COL]

df_attn = df[attn_cols].reset_index(drop=True)

dataset = AttnDataset(df_attn, [STORE_COL, SKU_COL], INPUT_FEATS_FOR_ATTN, TARGET_COL, window=ATTN_WINDOW)
loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True)

device = "cuda" if torch.cuda.is_available() else "cpu"
model = SelfAttentionEncoder(in_ch=len(INPUT_FEATS_FOR_ATTN), d_model=64, nhead=4, num_layers=1, out_hidden=64).to(device)
opt = torch.optim.Adam(model.parameters(), lr=LR)
loss_fn = nn.MSELoss()

print(f"[INFO] Attention pretraining start on {device}, steps={len(loader)}")
model.train()
for epoch in range(AT_EPOCHS):
    running = 0.0
    for X_seq, y in loader:
        X_seq = X_seq.to(device)
        y = y.to(device)
        opt.zero_grad()
        yhat, _ = model(X_seq)
        loss = loss_fn(yhat, y)
        loss.backward()
        opt.step()
        running += loss.item() * X_seq.size(0)
    print(f"  epoch {epoch+1}/{AT_EPOCHS}  MSE={running/len(dataset):.4f}")

[INFO] Attention pretraining start on cuda, steps=546
  epoch 1/8  MSE=162.6398
  epoch 2/8  MSE=65.0368
  epoch 3/8  MSE=64.4845
  epoch 4/8  MSE=64.0236
  epoch 5/8  MSE=63.8449
  epoch 6/8  MSE=63.6346
  epoch 7/8  MSE=63.4837
  epoch 8/8  MSE=63.1338


In [None]:
# ----------------------------
# Attention 文脈ベクトルの生成
# 各行 t に対し t-ATTN_WINDOW..t-1 の系列から ctx を作り、t の特徴に付与
# ----------------------------
@torch.no_grad()
def build_context_features(frame, input_feats, window, batch=4096):
    # frame は (store, sku) で時系列に並んでいる前提
    ctx_list = []
    idx_list = []
    model.eval()

    # 連結用に各系列でスライド
    for (s, k), g in frame.groupby([STORE_COL, SKU_COL]):
        g = g.sort_values(DATE_COL)
        arr = g.index.to_numpy()
        # 文脈を付与できるのは window 以降
        for i in range(window, len(g)):
            rows = arr[i-window:i]
            tgt = arr[i]
            X_seq = torch.tensor(frame.loc[rows, input_feats].to_numpy(dtype=np.float32)).unsqueeze(0).to(device)
            _, ctx = model(X_seq)
            ctx_list.append(ctx.squeeze(0).cpu().numpy())
            idx_list.append(tgt)

    ctx_mat = np.zeros((len(frame), ctx_list[0].shape[0]), dtype=np.float32)
    for idx, vec in zip(idx_list, ctx_list):
        ctx_mat[idx, :] = vec
    return ctx_mat

ctx_mat = build_context_features(df_attn, INPUT_FEATS_FOR_ATTN, ATTN_WINDOW)
ctx_cols = [f"attn_ctx_{i}" for i in range(ctx_mat.shape[1])]
ctx_df = pd.DataFrame(ctx_mat, columns=ctx_cols, index=df_attn.index)
df = pd.concat([df.reset_index(drop=True), ctx_df], axis=1)

# 欠損（文脈の無い初期期間）を落とす
df = df[~(df[ctx_cols].abs().sum(axis=1) == 0)].reset_index(drop=True)

In [None]:
# ----------------------------
# 学習データセット作成
# ----------------------------
feature_cols = []

# 数値系
num_cols = [c for c in df.columns if any(c.startswith(f"{TARGET_COL}_lag") for _ in [0])] \
         + [c for c in df.columns if c.startswith(f"{TARGET_COL}_roll")] \
         + [f"{PRICE_COL}_lag1"] if f"{PRICE_COL}_lag1" in df.columns else []
if PROMO_COL in df.columns and f"{PROMO_COL}_lag1" in df.columns:
    num_cols += [f"{PROMO_COL}_lag1"]

# カレンダー
cal_cols = ["dow", "weekofyear", "month", "year"]

# 価格・販促・当期値（説明変数として）
direct_cols = []
if PRICE_COL in df.columns:
    direct_cols.append(PRICE_COL)
if PROMO_COL in df.columns:
    direct_cols.append(PROMO_COL)

feature_cols = list(dict.fromkeys(num_cols + cal_cols + direct_cols + ctx_cols))

# 目的変数
y = df[TARGET_COL].astype(float).to_numpy()
X = df[feature_cols].astype(float).to_numpy()

# グループKFold（(store,sku)でリーク回避の疑似CV）
groups = df["_group"].to_numpy()
gkf = GroupKFold(n_splits=5)


In [None]:
def rmse(y_true, y_pred):
    return math.sqrt(mean_squared_error(y_true, y_pred))

def mape(y_true, y_pred, eps=1e-6):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / (np.maximum(np.abs(y_true), eps))))*100

def fit_gbdt(X_tr, y_tr, X_va, y_va):
    if GBDT_BACKEND == "lightgbm":
        train = lgb.Dataset(X_tr, label=y_tr)
        valid = lgb.Dataset(X_va, label=y_va, reference=train)
        params = dict(
            objective="regression",
            metric="rmse",
            learning_rate=0.05,
            num_leaves=64,
            feature_fraction=0.8,
            bagging_fraction=0.8,
            bagging_freq=1,
            min_data_in_leaf=64,
            seed=SEED
        )
        model = lgb.train(params, train, valid_sets=[valid], num_boost_round=2000,
                          callbacks=[lgb.early_stopping(100), lgb.log_evaluation(period=0)])
        pred = model.predict(X_va, num_iteration=model.best_iteration)
        return model, pred

    elif GBDT_BACKEND == "xgboost":
        dtr = xgb.DMatrix(X_tr, label=y_tr)
        dva = xgb.DMatrix(X_va, label=y_va)
        params = dict(
            objective="reg:squarederror",
            eval_metric="rmse",
            eta=0.05,
            max_depth=8,
            subsample=0.8,
            colsample_bytree=0.8,
            seed=SEED
        )
        model = xgb.train(params, dtr, num_boost_round=2000,
                          evals=[(dva, "valid")], early_stopping_rounds=100, verbose_eval=False)
        pred = model.predict(dva, iteration_range=(0, model.best_iteration))
        return model, pred

    else:
        model = HistGradientBoostingRegressor(
            max_depth=None, learning_rate=0.05, max_iter=1000,
            l2_regularization=0.0, early_stopping=True, validation_fraction=0.1, random_state=SEED
        )
        model.fit(X_tr, y_tr)
        pred = model.predict(X_va)
        return model, pred

In [None]:
# ====== CV（AttnBoost特徴アリ）======
rmse_list, mape_list = [], []
for fold, (tr_idx, va_idx) in enumerate(gkf.split(X, y, groups)):
    X_tr, X_va = X[tr_idx], X[va_idx]
    y_tr, y_va = y[tr_idx], y[va_idx]
    model_gbdt, pred_va = fit_gbdt(X_tr, y_tr, X_va, y_va)
    rmse_list.append(rmse(y_va, pred_va))
    mape_list.append(mape(y_va, pred_va))
print(f"[AttnBoost] CV RMSE: {np.mean(rmse_list):.3f} ± {np.std(rmse_list):.3f}")
print(f"[AttnBoost] CV MAPE: {np.mean(mape_list):.2f}% ± {np.std(mape_list):.2f}%")

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.102886 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 19167
[LightGBM] [Info] Number of data points in the train set: 112000, number of used features: 86
[LightGBM] [Info] Start training from score 28.801946
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[643]	valid_0's rmse: 5.25286
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.098561 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 19173
[LightGBM] [Info] Number of data points in the train set: 112000, number of used features: 86
[LightGBM] [Info] Start training from score 28.920241
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[533]	valid_0's rmse: 5.21531
[LightGBM] [Info] Auto-choosing col-wise multi-threading

In [None]:
# ====== ベースライン（文脈特徴なし）======
feature_cols_base = [c for c in feature_cols if not c.startswith("attn_ctx_")]
Xb = df[feature_cols_base].astype(float).to_numpy()

rmse_b_list, mape_b_list = [], []
for fold, (tr_idx, va_idx) in enumerate(gkf.split(Xb, y, groups)):
    X_tr, X_va = Xb[tr_idx], Xb[va_idx]
    y_tr, y_va = y[tr_idx], y[va_idx]
    _, pred_va = fit_gbdt(X_tr, y_tr, X_va, y_va)
    rmse_b_list.append(rmse(y_va, pred_va))
    mape_b_list.append(mape(y_va, pred_va))
print(f"[Baseline] CV RMSE: {np.mean(rmse_b_list):.3f} ± {np.std(rmse_b_list):.3f}")
print(f"[Baseline] CV MAPE: {np.mean(mape_b_list):.2f}% ± {np.std(mape_b_list):.2f}%")

print("\n[Δ 改善(AttnBoost - Baseline)]")
print(f"RMSE: {np.mean(rmse_list)-np.mean(rmse_b_list):.3f}  (負なら改善)")
print(f"MAPE: {np.mean(mape_list)-np.mean(mape_b_list):.2f}% (負なら改善)")

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007949 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2847
[LightGBM] [Info] Number of data points in the train set: 112000, number of used features: 22
[LightGBM] [Info] Start training from score 28.801946
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[528]	valid_0's rmse: 5.24952
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.028982 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2853
[LightGBM] [Info] Number of data points in the train set: 112000, number of used features: 22
[LightGBM] [Info] Start training from score 28.920241
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[485]	valid_0's rmse: 5.

In [None]:
# ----------------------------
# 未来予測（簡易：直近の特徴で1～H週先を逐次）
# 実運用では、価格やプロモの将来計画を入力するか、シナリオ生成が必要
# ----------------------------
def one_step_forecast(last_df, model, horizon=1, backend=GBDT_BACKEND):
    # 入力は直近1期の行（各SKU×店舗）、ここでは簡易に同一分布と仮定
    X_in = last_df[feature_cols].astype(float).to_numpy()
    if backend == "lightgbm":
        yhat = model.predict(X_in, num_iteration=getattr(model, "best_iteration", None))
    elif backend == "xgboost":
        d = xgb.DMatrix(X_in)
        yhat = model.predict(d, iteration_range=(0, model.best_iteration))
    else:
        yhat = model.predict(X_in)
    return yhat

In [None]:
# 学習全体で最後の週を抽出して擬似予測
last_week = df[DATE_COL].max()
base_last = df[df[DATE_COL] == last_week].copy()
# 学習し直し（全データでAttnBoostモデルfit）
model_full, _ = fit_gbdt(X, y, X, y)

forecasts = []
cur_frame = base_last.copy()
for h in range(1, FORECAST_HORIZON_WEEKS+1):
    yhat = one_step_forecast(cur_frame, model_full, backend=GBDT_BACKEND)
    cur_frame[f"pred_t+{h}"] = yhat
    forecasts.append(cur_frame[[STORE_COL, SKU_COL, f"pred_t+{h}"]])

# 出力例：SKUごと合計の予測
pred_out = forecasts[-1].copy()  # 最終ホライズン
pred_agg = pred_out.groupby([STORE_COL])[f"pred_t+{FORECAST_HORIZON_WEEKS}"].sum().reset_index()
print("\n[予測例] 最終ホライズンの店舗合計（上位10）")
print(pred_agg.sort_values(by=f"pred_t+{FORECAST_HORIZON_WEEKS}", ascending=False).head(10))

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.117229 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 19174
[LightGBM] [Info] Number of data points in the train set: 140000, number of used features: 86
[LightGBM] [Info] Start training from score 28.875086
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[2000]	valid_0's rmse: 3.83666

[予測例] 最終ホライズンの店舗合計（上位10）
    Country     pred_t+4
6         6  3865.210904
1         1  3836.430544
3         3  3799.960449
4         4  3782.738577
11       11  3781.748168
14       14  3780.760371
12       12  3778.890554
10       10  3758.669656
13       13  3746.231159
19       19  3687.556596
