In [1]:
import pandas as pd

df = pd.read_csv("../cleaned_data/user_with_cnt_coupon_weather_cleaned.csv")
mgm = pd.read_csv("../data/mgm.csv")

# 確保 experiment_date 都是日期型別（避免字串格式不一致導致對不到）
df["experiment_date"] = pd.to_datetime(df["experiment_date"]).dt.date
mgm["experiment_date"] = pd.to_datetime(mgm["experiment_date"]).dt.date

# 合併：保留 df 全部列，補上 mgm_day
out = df.merge(mgm, on="experiment_date", how="left")

# 簡單檢查
print("原始筆數:", len(df), "合併後筆數:", len(out))
print("mgm_day 缺失筆數:", out["mgm_day"].isna().sum())
print(out[["experiment_date", "mgm_day"]].drop_duplicates().sort_values("experiment_date").head(10))


原始筆數: 19326 合併後筆數: 19326
mgm_day 缺失筆數: 1341
     experiment_date  mgm_day
0         2025-07-28      7.0
154       2025-08-04      0.0
315       2025-08-18      0.0
474       2025-08-25      5.0
634       2025-09-01      2.0
792       2025-09-08      0.0
952       2025-09-15      0.0
1110      2025-09-22      2.0
1268      2025-09-29      7.0
1427      2025-10-06      1.0


In [2]:
out.head()

Unnamed: 0,experiment_date,treatment,source,ops_type_merged,city,user_cnt,nonrepeat_cnt,trip_cnt,weekday_nonrepeat_cnt,weekend_nonrepeat_cnt,...,coupon_folk_total,coupon_growth_other_total,coupon_MGM_total,coupon_MKT_total,coupon_register_total,coupon_daily_total,avg_rainy_day,avg_rainy_weekday,avg_rainy_weekend,mgm_day
0,2025-07-28,不發,控制組,14天在其他尖峰預估車資,南投縣,3,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1.0,0.0,0.5,7.0
1,2025-07-28,不發,控制組,14天在其他尖峰預估車資,嘉義市,15,4.0,4.0,1.0,3.0,...,0,0,0,0,0,0,1.0,0.0,1.0,7.0
2,2025-07-28,不發,控制組,14天在其他尖峰預估車資,嘉義縣,13,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1.0,0.0,0.5,7.0
3,2025-07-28,不發,控制組,14天在其他尖峰預估車資,基隆市,50,13.0,10.0,6.0,7.0,...,0,0,4,0,0,0,0.714286,0.0,0.0,7.0
4,2025-07-28,不發,控制組,14天在其他尖峰預估車資,宜蘭縣,12,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0.857143,0.0,0.0,7.0


In [3]:
out.describe()

Unnamed: 0,user_cnt,nonrepeat_cnt,trip_cnt,weekday_nonrepeat_cnt,weekend_nonrepeat_cnt,weekday_trip_cnt,weekend_trip_cnt,nonrepeat_cnt_per_user,trip_cnt_per_user,weekday_nonrepeat_cnt_per_user,...,coupon_folk_total,coupon_growth_other_total,coupon_MGM_total,coupon_MKT_total,coupon_register_total,coupon_daily_total,avg_rainy_day,avg_rainy_weekday,avg_rainy_weekend,mgm_day
count,19326.0,19326.0,19326.0,19326.0,19326.0,19326.0,19326.0,19326.0,19326.0,19326.0,...,19326.0,19326.0,19326.0,19326.0,19326.0,19326.0,19326.0,19326.0,19326.0,17985.0
mean,81.226896,11.042482,7.804305,7.576633,3.465849,4.848805,2.9555,0.119196,0.083621,0.081213,...,63.0549,0.501604,1.551537,0.797061,1.373797,0.020025,0.206081,0.075422,0.016653,1.830192
std,182.847088,30.991056,21.865559,21.428368,10.049814,14.063118,8.36436,0.186399,0.146743,0.143449,...,136.373231,2.340061,18.173851,4.097606,4.132387,0.159762,0.298299,0.197742,0.103636,2.201325
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,18.0,1.0,1.0,1.0,0.0,0.0,0.0,0.067073,0.038462,0.03687,...,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,68.0,8.0,6.0,5.0,2.0,3.0,2.0,0.166667,0.117647,0.113377,...,50.0,0.0,0.0,0.0,1.0,0.0,0.285714,0.0,0.0,4.0
max,3795.0,573.0,439.0,390.0,227.0,292.0,151.0,4.0,3.666667,3.666667,...,1652.0,82.0,1646.0,101.0,83.0,4.0,1.0,1.0,1.0,7.0


In [4]:
from pathlib import Path

out_path = Path("..") / "cleaned_data" / "user_with_cnt_coupon_weather_mgm_cleaned.csv"
out.to_csv(out_path, index=False, encoding="utf-8-sig")

print("Saved:", out_path.resolve())

Saved: D:\minhsiang.chang\Desktop\2026winter_project\cleaned_data\user_with_cnt_coupon_weather_mgm_cleaned.csv
