In [None]:
# === Notebook bootstrap: make repo root importable ===
import sys
from pathlib import Path

_cwd = Path.cwd().resolve()
for p in [_cwd, *_cwd.parents]:
    if (p / "src").exists():
        if str(p) not in sys.path:
            sys.path.insert(0, str(p))
        break

print("cwd:", _cwd)
print("sys.path[0]:", sys.path[0])


In [None]:
#01
# Purpose: Explore reasonable lag range per channel via log-log lag correlation (EDA)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from src.io_utils import load_pickle, outputs_dir

OUT_DIR = outputs_dir()
DF_W_PATH = Path(OUT_DIR) / "df_w.pkl"   # または df_w_feat.pkl を使うならここを変更

df_w = load_pickle(DF_W_PATH)

print("df_w shape:", df_w.shape)
display(df_w.head(3))


In [None]:
#02
# log変換（ゼロ対策込み）
df_lag = df_w.copy()
df_lag["log_sales"] = np.log1p(df_lag["sales"])
df_lag["log_online"] = np.log1p(df_lag["online_spend"])
df_lag["log_broadcast"] = np.log1p(df_lag["broadcast_spend"])
df_lag["log_ooh"] = np.log1p(df_lag["ooh_print_spend"])

display(df_lag[["log_sales","log_online","log_broadcast","log_ooh"]].describe())


In [None]:
#03
# lag相関関数
def lag_corr(x: pd.Series, y: pd.Series, lag: int) -> float:
    s = pd.concat([x.shift(lag), y], axis=1).dropna()
    return float(s.iloc[:, 0].corr(s.iloc[:, 1]))

In [None]:
#04
# 媒体別 lag 相関の算出
max_lag = 12
lags = range(0, max_lag + 1)

corr_online = [lag_corr(df_lag["log_online"], df_lag["log_sales"], l) for l in lags]
corr_broadcast = [lag_corr(df_lag["log_broadcast"], df_lag["log_sales"], l) for l in lags]
corr_ooh = [lag_corr(df_lag["log_ooh"], df_lag["log_sales"], l) for l in lags]


In [None]:
#05
# lag相関プロット
plt.figure(figsize=(10, 5))

plt.plot(
    lags, corr_broadcast,
    marker="o", label="TV + Radio × Sales",
    color="#4DA9CD"
)
plt.plot(
    lags, corr_ooh,
    marker="o", label="OOH / Print × Sales",
    color="#F1CD46"
)
plt.plot(
    lags, corr_online,
    marker="o", label="Online × Sales",
    color="#8453F6"
)

plt.axhline(0, linestyle="--", color="gray", linewidth=1)

plt.xlabel("Lag (weeks)")
plt.ylabel("Correlation (log-log)")
plt.title("Lag correlation with Sales (Weekly, log-log)")
plt.legend()
plt.grid(False)

plt.tight_layout()
plt.show()


### Interpretation

- Online spend shows a relatively strong correlation with sales even with several weeks of lag,
  suggesting a short-to-mid term carryover effect.
- Broadcast (TV/Radio) correlation decays more quickly, indicating a more immediate impact.
- OOH/Print shows weak or negative correlations across lags, implying limited short-term carryover.

Based on these observations, decay parameters will be explored in a reasonable range
in the next step using AIC-based optimization.
