In [None]:
# === Notebook bootstrap: make repo root importable ===
import sys
from pathlib import Path

_cwd = Path.cwd().resolve()
for p in [_cwd, *_cwd.parents]:
    if (p / "src").exists():
        if str(p) not in sys.path:
            sys.path.insert(0, str(p))
        break

print("cwd:", _cwd)
print("sys.path[0]:", sys.path[0])


In [None]:
from pathlib import Path
import pandas as pd

from src.io_utils import project_root, outputs_dir, save_pickle

DATA_PATH = project_root() / "data" / "bike_sales_data.csv"
OUT_DIR = outputs_dir()

In [None]:
# csv読み込み〜加工まで
df = pd.read_csv(DATA_PATH)
print("raw df:", df.shape)

# spendをfloatに揃える
spend_cols = [
    "branded_search_spend",
    "nonbranded_search_spend",
    "facebook_spend",
    "print_spend",
    "ooh_spend",
    "tv_spend",
    "radio_spend",
]

for c in spend_cols:
    df[c] = pd.to_numeric(df[c], errors="coerce")
df[spend_cols] = df[spend_cols].fillna(0.0).astype(float)

# total + group spends
broadcast_cols = ["tv_spend", "radio_spend"]
ooh_print_cols = ["ooh_spend", "print_spend"]
online_cols = ["branded_search_spend", "nonbranded_search_spend", "facebook_spend"]

df["total_spend"] = df[spend_cols].sum(axis=1)
df["broadcast_spend"] = df[broadcast_cols].sum(axis=1)
df["ooh_print_spend"] = df[ooh_print_cols].sum(axis=1)
df["online_spend"] = df[online_cols].sum(axis=1)

display(df[["Week", "sales", "total_spend", "broadcast_spend", "ooh_print_spend", "online_spend"]].head(3))


In [None]:
# df_w 作成 + 保存
df_w = df.loc[:, [
    "Week",
    "sales",
    "broadcast_spend",
    "ooh_print_spend",
    "online_spend",
    "total_spend"
]].copy()

df_w["Week"] = pd.to_datetime(
    df_w["Week"],
    format="%m/%d/%y",
    errors="raise"
)

df_w = df_w.sort_values("Week").reset_index(drop=True)

print("df_w:", df_w.shape)
print(df_w["Week"].min(), "→", df_w["Week"].max())
display(df_w.head(3))
display(df_w.tail(3))

save_pickle(df_w, Path(OUT_DIR) / "df_w.pkl")
print("Saved:", Path(OUT_DIR) / "df_w.pkl")
