In [None]:
# === Notebook bootstrap: make repo root importable ===
import sys
from pathlib import Path

_cwd = Path.cwd().resolve()
for p in [_cwd, *_cwd.parents]:
    if (p / "src").exists():
        if str(p) not in sys.path:
            sys.path.insert(0, str(p))
        break

print("cwd:", _cwd)
print("sys.path[0]:", sys.path[0])


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from src.io_utils import load_pickle, outputs_dir

OUT_DIR = outputs_dir()          # Path を返す前提
DF_W_PATH = Path(OUT_DIR) / "df_w.pkl"

df_w = load_pickle(DF_W_PATH)

print("df_w shape:", df_w.shape)
print("dtypes:\n", df_w.dtypes)
display(df_w.head(3))


In [None]:
#02
# 最小の前提チェック
# EDA専用コピー（df_wは直接いじらない）
df_eda = df_w.copy()

# 念のため
df_eda["Week"] = pd.to_datetime(df_eda["Week"], errors="raise")
df_eda = df_eda.sort_values("Week").reset_index(drop=True)

print(df_eda["Week"].min(), "→", df_eda["Week"].max())
print("is_monotonic:", df_eda["Week"].is_monotonic_increasing)


In [None]:
# 03
# 相関ヒートマップ 媒体EDA

def plot_corr_heatmap(df, title):
    plt.figure(figsize=(10, 5))
    ax = sns.heatmap(
        df.corr(),
        linewidths=0.1,
        linecolor="white",
        vmax=1.0,
        vmin=-1.0,
        cmap=plt.cm.RdBu_r,
        annot=True,
        annot_kws={"fontsize": 14},
        fmt=".2f"
    )
    ax.set_xticklabels(ax.get_xticklabels(), fontsize=14, rotation=45, ha="right")
    ax.set_yticklabels(ax.get_yticklabels(), fontsize=14)
    plt.title(title)
    plt.tight_layout()
    plt.show()

cols_media = [
    "sales",
    "broadcast_spend",
    "ooh_print_spend",
    "online_spend",
    "total_spend",
]

plot_corr_heatmap(
    df_eda[cols_media],
    title="Correlation heatmap (media variables)"
)


In [None]:
#04
#相関ヒートマップ 時間トレンド
df_time = df_eda.copy()
df_time["t"] = np.arange(len(df_time))

cols_time = [
    "t",
    "sales",
    "broadcast_spend",
    "ooh_print_spend",
    "online_spend",
]

plot_corr_heatmap(
    df_time[cols_time],
    title="Time-index correlation (trend effect check)"
)

def plot_corr_heatmap(df, title):
    plt.figure(figsize=(10, 5))
    ax = sns.heatmap(
        df.corr(),
        linewidths=0.1,
        linecolor="white",
        vmax=1.0,
        vmin=-1.0,
        cmap=plt.cm.RdBu_r,
        annot=True,
        annot_kws={"fontsize": 14},
        fmt=".2f"
    )
    ax.set_xticklabels(ax.get_xticklabels(), fontsize=14, rotation=45, ha="right")
    ax.set_yticklabels(ax.get_yticklabels(), fontsize=14)
    plt.title(title)
    plt.tight_layout()
    plt.show()




In [None]:
#05
# Weekly spend（積み上げ棒）＋ Sales（折れ線）
fig, ax1 = plt.subplots(figsize=(12, 4))

x = df_eda["Week"]

# 左軸：Media spend（積み上げ）
online = df_eda["online_spend"].values
broadcast = df_eda["broadcast_spend"].values
ooh = df_eda["ooh_print_spend"].values

ax1.bar(
    x, online,
    label="Online Spend",
    width=6,
    color="#8453F6",
    alpha=0.3
)

ax1.bar(
    x, broadcast,
    bottom=online,
    label="Broadcast Spend",
    width=6,
    color="#4DA9CD",
    alpha=0.3
)

ax1.bar(
    x, ooh,
    bottom=online + broadcast,
    label="OOH/Print Spend",
    width=6,
    color="#F1CD46",
    alpha=0.3
)

ax1.set_xlabel("Week")
ax1.set_ylabel("Media Spend")
ax1.grid(False)

# 右軸：sales（折れ線）
ax2 = ax1.twinx()
ax2.plot(
    x,
    df_eda["sales"].values,
    color="#DB544E",
    linewidth=2,
    label="Sales"
)
ax2.set_ylabel("Sales")

# 凡例（両軸まとめる）
h1, l1 = ax1.get_legend_handles_labels()
h2, l2 = ax2.get_legend_handles_labels()
ax1.legend(h1 + h2, l1 + l2, loc="upper left")

plt.title("Weekly: Media Spend (stacked bars) and Sales (line)")
plt.tight_layout()
plt.show()


The strong correlation between the time index (t) and sales indicates that trend and seasonality are major drivers, justifying the inclusion of time and seasonal terms in the MMM.