In [None]:
# === Notebook bootstrap: make repo root importable ===
import sys
from pathlib import Path

_cwd = Path.cwd().resolve()
for p in [_cwd, *_cwd.parents]:
    if (p / "src").exists():
        if str(p) not in sys.path:
            sys.path.insert(0, str(p))
        break

print("cwd:", _cwd)
print("sys.path[0]:", sys.path[0])


In [None]:
# 01
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from src.io_utils import load_pickle, save_pickle, outputs_dir
from src.features import build_log_ad_from_spend  # geometric_adstockはここでは直接使ってないなら外す
from src.modeling import build_X_for_model        # ← Step4で正本化した前提（重要）

OUT_DIR = outputs_dir()

IN_PATH = Path(OUT_DIR) / "df_w_feat.pkl"
MODEL_PATH = Path(OUT_DIR) / "final_model.pkl"

WEEKLY_SCORE_PATH = Path(OUT_DIR) / "weekly_score.pkl"
DF_WITH_ROI_PATH  = Path(OUT_DIR) / "df_w_with_roi.pkl"

ADSTOCK_TYPE = "geometric"
D_ONLINE    = 0.50
D_BROADCAST = 0.00
D_OOH       = 0.85

DELTA_SPEND = 100.0


In [None]:
# 02
df_w = load_pickle(IN_PATH).copy()
final_model = load_pickle(MODEL_PATH)
print("Model exog names:", final_model.model.exog_names)


In [None]:
# 03
# Prepare base features
df_w["log_ad_online"]    = build_log_ad_from_spend(df_w["online_spend"],    D_ONLINE)
df_w["log_ad_broadcast"] = build_log_ad_from_spend(df_w["broadcast_spend"], D_BROADCAST)  # decay=0
df_w["log_ad_ooh"]       = build_log_ad_from_spend(df_w["ooh_print_spend"], D_OOH)

# モデル入力Xをモデル列順で構築
EXOG = final_model.model.exog_names
X = build_X_for_model(df_w, EXOG)
display(X.head())

In [None]:
# 04
# =========================
# Functions (local to this notebook)
# =========================

def predict_sales(model, X_input: pd.DataFrame) -> np.ndarray:
    """Predict sales scale from log1p model output."""
    yhat_log = model.predict(X_input)
    return np.expm1(yhat_log)


def channel_contrib_by_diff(
    model,
    X_full: pd.DataFrame,
    sales_hat_full: np.ndarray,
    channel_col: str
) -> np.ndarray:
    """
    Contribution by difference:
    contrib = sales_hat_full - sales_hat_without_channel
    """
    X_tmp = X_full.copy()
    if channel_col in X_tmp.columns:
        X_tmp[channel_col] = 0.0
    sales_without = predict_sales(model, X_tmp)
    return sales_hat_full - sales_without


def build_X_with_log_ads(
    df_base: pd.DataFrame,
    log_online: np.ndarray,
    log_broadcast: np.ndarray,
    log_ooh: np.ndarray,
    exog_names
) -> pd.DataFrame:
    """Helper to rebuild X after substituting log_ad_* columns."""
    df_tmp = df_base.copy()
    df_tmp["log_ad_online"] = log_online
    df_tmp["log_ad_broadcast"] = log_broadcast
    df_tmp["log_ad_ooh"] = log_ooh
    return build_X_for_model(df_tmp, exog_names)


def plot_timing(weekly_score: pd.DataFrame, roi_col: str, score_col: str, title: str) -> None:
    fig, ax1 = plt.subplots(figsize=(10, 4))

    ax1.plot(
        weekly_score["week_of_year"],
        weekly_score["baseline_hat"],
        linestyle=":",
        linewidth=2,
        label="Baseline demand",
        color="#FF897C"
    )
    ax1.set_xlabel("Week of year")
    ax1.set_ylabel("Baseline demand")

    ax2 = ax1.twinx()
    ax2.plot(
        weekly_score["week_of_year"],
        weekly_score[roi_col],
        linestyle="--",
        linewidth=2,
        label="ROI",
        color="gray"
    )
    ax2.plot(
        weekly_score["week_of_year"],
        weekly_score[score_col],
        linewidth=2,
        color="#A6CE25",
        label="Demand × ROI"
    )
    ax2.set_ylabel("ROI / Score")

    l1, lab1 = ax1.get_legend_handles_labels()
    l2, lab2 = ax2.get_legend_handles_labels()
    ax1.legend(l1 + l2, lab1 + lab2, loc="upper left")

    ax1.grid(False)
    ax2.grid(False)

    plt.title(title)
    plt.tight_layout()
    plt.show()




In [None]:
# 06
# baseline_hat（広告ゼロ世界）
X_baseline = X.copy()
for c in ["log_ad_online", "log_ad_broadcast", "log_ad_ooh"]:
    if c in X_baseline.columns:
        X_baseline[c] = 0.0

df_w["baseline_hat"] = predict_sales(final_model, X_baseline)
display(df_w[["Week","sales","baseline_hat"]].head())


In [None]:
# 07
# フル予測（現実世界）
df_w["sales_hat"] = predict_sales(final_model, X)

# baselineとの差分（メディア総寄与）
df_w["media_hat_total"] = df_w["sales_hat"] - df_w["baseline_hat"]

# チャネル別（差分法）
sales_hat_full = df_w["sales_hat"].values

if "log_ad_online" in X.columns:
    df_w["media_hat_online"] = channel_contrib_by_diff(final_model, X, sales_hat_full, "log_ad_online")
if "log_ad_broadcast" in X.columns:
    df_w["media_hat_broadcast"] = channel_contrib_by_diff(final_model, X, sales_hat_full, "log_ad_broadcast")
if "log_ad_ooh" in X.columns:
    df_w["media_hat_ooh"] = channel_contrib_by_diff(final_model, X, sales_hat_full, "log_ad_ooh")

cols_show = ["Week","sales","baseline_hat","sales_hat","media_hat_total",
             "media_hat_online","media_hat_broadcast","media_hat_ooh"]
cols_show = [c for c in cols_show if c in df_w.columns]
display(df_w[cols_show].head())

In [None]:
# 08
# base（spendから再生成）
log_ad_online_base    = build_log_ad_from_spend(df_w["online_spend"],    D_ONLINE)
log_ad_broadcast_base = build_log_ad_from_spend(df_w["broadcast_spend"], D_BROADCAST)
log_ad_ooh_base       = build_log_ad_from_spend(df_w["ooh_print_spend"], D_OOH)

X_base = build_X_with_log_ads(df_w, log_ad_online_base, log_ad_broadcast_base, log_ad_ooh_base, EXOG)
sales_base = predict_sales(final_model, X_base)

# Online + delta
log_ad_online_plus = build_log_ad_from_spend(df_w["online_spend"] + DELTA_SPEND, D_ONLINE)
X_online_plus = build_X_with_log_ads(df_w, log_ad_online_plus, log_ad_broadcast_base, log_ad_ooh_base, EXOG)
sales_online_plus = predict_sales(final_model, X_online_plus)
df_w["inc_sales_online"] = sales_online_plus - sales_base
df_w["roi_online"] = df_w["inc_sales_online"] / DELTA_SPEND

# Broadcast + delta
log_ad_broadcast_plus = build_log_ad_from_spend(df_w["broadcast_spend"] + DELTA_SPEND, D_BROADCAST)
X_broadcast_plus = build_X_with_log_ads(df_w, log_ad_online_base, log_ad_broadcast_plus, log_ad_ooh_base, EXOG)
sales_broadcast_plus = predict_sales(final_model, X_broadcast_plus)
df_w["inc_sales_broadcast"] = sales_broadcast_plus - sales_base
df_w["roi_broadcast"] = df_w["inc_sales_broadcast"] / DELTA_SPEND

# OOH + delta
log_ad_ooh_plus = build_log_ad_from_spend(df_w["ooh_print_spend"] + DELTA_SPEND, D_OOH)
X_ooh_plus = build_X_with_log_ads(df_w, log_ad_online_base, log_ad_broadcast_base, log_ad_ooh_plus, EXOG)
sales_ooh_plus = predict_sales(final_model, X_ooh_plus)
df_w["inc_sales_ooh"] = sales_ooh_plus - sales_base
df_w["roi_ooh"] = df_w["inc_sales_ooh"] / DELTA_SPEND

# week_of_year（既存列優先）
if "week_of_year" in df_w.columns:
    df_w["week_of_year"] = df_w["week_of_year"].astype(int)
elif "iso_week" in df_w.columns:
    df_w["week_of_year"] = df_w["iso_week"].astype(int)
else:
    df_w["week_of_year"] = df_w["Week"].dt.isocalendar().week.astype(int)

# demand × ROI
df_w["score_online"]    = df_w["baseline_hat"] * df_w["roi_online"]
df_w["score_broadcast"] = df_w["baseline_hat"] * df_w["roi_broadcast"]
df_w["score_ooh"]       = df_w["baseline_hat"] * df_w["roi_ooh"]

weekly_score = (
    df_w.groupby("week_of_year")[[
        "baseline_hat",
        "roi_online","roi_broadcast","roi_ooh",
        "score_online","score_broadcast","score_ooh"
    ]]
    .mean()
    .reset_index()
    .sort_values("week_of_year")
)

display(weekly_score.head())


In [None]:
# 09
# Best week 抽出
top_online    = weekly_score.loc[weekly_score["score_online"].idxmax()]
top_broadcast = weekly_score.loc[weekly_score["score_broadcast"].idxmax()]
top_ooh       = weekly_score.loc[weekly_score["score_ooh"].idxmax()]

print("=== Best week by 'Demand × ROI' ===")
print(f"Online    : week {int(top_online['week_of_year'])}, score={top_online['score_online']:.3f}, ROI={top_online['roi_online']:.6f}")
print(f"Broadcast : week {int(top_broadcast['week_of_year'])}, score={top_broadcast['score_broadcast']:.3f}, ROI={top_broadcast['roi_broadcast']:.6f}")
print(f"OOH/Print : week {int(top_ooh['week_of_year'])}, score={top_ooh['score_ooh']:.3f}, ROI={top_ooh['roi_ooh']:.6f}")


In [None]:
# 10
# Plot
plot_timing(weekly_score, "roi_online", "score_online", "Timing optimization (Online)")
plot_timing(weekly_score, "roi_broadcast", "score_broadcast", "Timing optimization (Broadcast)")
plot_timing(weekly_score, "roi_ooh", "score_ooh", "Timing optimization (OOH/Print)")

In [None]:
# 11
# 保存（契約成果物のみ）
save_pickle(weekly_score, WEEKLY_SCORE_PATH)

print("Saved:", WEEKLY_SCORE_PATH)


### NOTE:
This model is a log-log MMM:
- y = log1p(sales)
- media variables = log1p(adstock(spend))

### Therefore:
- ROI here represents marginal (local) ROI, not average ROI
- ROI varies by baseline demand level
- 'Demand × ROI' is a theoretically consistent timing score
