In [None]:
# === Notebook bootstrap: make repo root importable ===
import sys
from pathlib import Path

_cwd = Path.cwd().resolve()
for p in [_cwd, *_cwd.parents]:
    if (p / "src").exists():
        if str(p) not in sys.path:
            sys.path.insert(0, str(p))
        break

print("cwd:", _cwd)
print("sys.path[0]:", sys.path[0])


In [None]:
# 01
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns

from src.io_utils import load_pickle, save_pickle, outputs_dir
from src.features import build_log_ad_from_spend
from src.modeling import build_X_for_model

OUT_DIR = outputs_dir()

IN_PATH = Path(OUT_DIR) / "df_w_feat.pkl"
MODEL_PATH = Path(OUT_DIR) / "final_model.pkl"

WEEKLY_SCORE_PATH = Path(OUT_DIR) / "weekly_score.pkl"

ADSTOCK_TYPE = "geometric"
D_ONLINE    = 0.50
D_BROADCAST = 0.00
D_OOH       = 0.85

DELTA_SPEND = 100.0

df = load_pickle(IN_PATH).copy()
print("Loaded:", IN_PATH, "shape:", df.shape)

In [None]:
# 02
# 目的変数の作成 & 必須列チェック
df["log_sales"] = np.log1p(df["sales"])

required = [
    "online_spend", "broadcast_spend", "ooh_print_spend",
    "t", "sin_1", "cos_1", "sin_2", "cos_2",
    "log_sales"
]
missing = [c for c in required if c not in df.columns]
if missing:
    raise ValueError(f"Missing required columns: {missing}")

print("OK: required columns exist.")
display(df[required].head(3))


In [None]:
# 03
# Geometric Adstock & log_ad の関数
def geometric_adstock(x, decay: float):
    # decay の妥当性チェック（追加）
    if not (0.0 <= float(decay) <= 1.0):
        raise ValueError(f"decay must be in [0,1], got {decay}")

    x = np.asarray(x, dtype=float)
    x = np.nan_to_num(x, nan=0.0)

    out = np.zeros_like(x, dtype=float)
    carry = 0.0
    for i in range(len(x)):
        carry = x[i] + decay * carry
        out[i] = carry

    return out


def build_log_ad_from_spend(spend_series, decay: float):
    ad = geometric_adstock(np.asarray(spend_series, dtype=float), decay)
    return np.log1p(ad)


In [None]:
# 04
# 1組のdecayでOLSを当ててAICを返す関数
# NOTE: const is manually added here; do not use sm.add_constant elsewhere for this X.
def fit_aic_for_decays(df, d_online, d_broadcast, d_ooh):
    log_ad_online = build_log_ad_from_spend(df["online_spend"].values, d_online)
    log_ad_broadcast = build_log_ad_from_spend(df["broadcast_spend"].values, d_broadcast)
    log_ad_ooh = build_log_ad_from_spend(df["ooh_print_spend"].values, d_ooh)

    X = pd.DataFrame({
        "const": 1.0,
        "log_ad_online": log_ad_online,
        "log_ad_broadcast": log_ad_broadcast,
        "log_ad_ooh": log_ad_ooh,
        "t": df["t"].values,
        "sin_1": df["sin_1"].values,
        "cos_1": df["cos_1"].values,
        "sin_2": df["sin_2"].values,
        "cos_2": df["cos_2"].values,
    })

    y = df["log_sales"].values

    model = sm.OLS(y, X).fit()
    return float(model.aic), model


In [None]:
# 05
# 探索グリッド（lag結果に沿ってレンジを決める）
grid_online = [0.0, 0.3, 0.5, 0.7, 0.85, 0.95]
grid_broadcast = [0.0, 0.2, 0.4, 0.6, 0.7, 0.85]
grid_ooh = [0.0, 0.1, 0.3, 0.5, 0.7, 0.85]


In [None]:
# 06
# 全探索して df_decay を作る
rows = []
best = {"AIC": np.inf}

for d_ooh in grid_ooh:
    for d_broadcast in grid_broadcast:
        for d_online in grid_online:
            aic, _ = fit_aic_for_decays(df, d_online, d_broadcast, d_ooh)
            rows.append({
                "d_online": d_online,
                "d_broadcast": d_broadcast,
                "d_ooh": d_ooh,
                "AIC": aic
            })
            if aic < best["AIC"]:
                best = {"d_online": d_online, "d_broadcast": d_broadcast, "d_ooh": d_ooh, "AIC": aic}

df_decay = pd.DataFrame(rows).sort_values("AIC").reset_index(drop=True)

print("Best (min AIC):", best)
display(df_decay.head(10))


In [None]:
# 07
# AICヒートマップ
broadcast_fixed = float(df_decay.iloc[0]["d_broadcast"])

df_slice = df_decay[df_decay["d_broadcast"] == broadcast_fixed].copy()

pivot = df_slice.pivot_table(
    index="d_ooh",
    columns="d_online",
    values="AIC",
    aggfunc="min"
).sort_index(ascending=True)

plt.figure(figsize=(7, 5))
ax = sns.heatmap(
    pivot,
    annot=True,
    fmt=".1f",
    cmap="coolwarm",
    cbar_kws={"label": "AIC"}
)

plt.title(f"AIC heatmap (broadcast fixed = {broadcast_fixed})")
plt.xlabel("Online decay")
plt.ylabel("OOH decay")
plt.tight_layout()
plt.show()


In [None]:
# 08
# 採用decayで final_model を確定
best_row = df_decay.iloc[0]
d_online = float(best_row["d_online"])
d_broadcast = float(best_row["d_broadcast"])
d_ooh = float(best_row["d_ooh"])

aic, final_model = fit_aic_for_decays(df, d_online, d_broadcast, d_ooh)

print("Selected decays:")
print("  d_online    =", d_online)
print("  d_broadcast =", d_broadcast)
print("  d_ooh       =", d_ooh)
print("AIC:", aic)

print(final_model.summary())


In [None]:
# 09
# 保存（契約成果物のみ outputs に集約）
# - best_decay.pkl
# - final_model.pkl

best_payload = {
    "d_online": float(d_online),
    "d_broadcast": float(d_broadcast),
    "d_ooh": float(d_ooh),
    "AIC": float(aic),
}

save_pickle(best_payload, Path(OUT_DIR) / "best_decay.pkl")
save_pickle(final_model, Path(OUT_DIR) / "final_model.pkl")

print("Saved:", Path(OUT_DIR) / "best_decay.pkl")
print("Saved:", Path(OUT_DIR) / "final_model.pkl")


- We selected the decay parameters that minimize AIC, balancing model fit and complexity.
- The selected decays are consistent with the lag-correlation exploration (Online shows longer carryover than Broadcast; OOH is limited).