In [None]:
# ==========================================
# churn_data Data Cleansing
# ==========================================
# - 離反顧客分析用のデータを整形
# ------------------------------------------
# Author : kokoyugura
# Date   : 2025-11
# Env    : Python 3.10+, openpyxl, pandas
# ==========================================

In [4]:
# === ライブラリ ===
import pandas as pd
import numpy as np
from datetime import datetime
from pathlib import Path

In [5]:
# === データ読み込み ===
ROOT = Path.cwd()
DATA_PATH = ROOT / "data" / "transactions.csv"
df = pd.read_csv(DATA_PATH, parse_dates=["Date"])

In [6]:
# === Recency / Frequency / Monetary 基本特徴 ===
today = datetime(2025, 11, 1)
rfm = df.groupby("CustomerID").agg(
    LastPurchase=("Date", "max"),
    Frequency=("Date", "count"),
    Monetary=("Amount", "sum"),
    AvgAmount=("Amount", "mean")
).reset_index()
rfm["Recency"] = (today - rfm["LastPurchase"]).dt.days

# === 購買間隔の平均（AvgInterval） ===
intervals = (
    df.sort_values(["CustomerID", "Date"])
    .groupby("CustomerID")["Date"]
    .diff()
    .dt.days
)
df["Interval"] = intervals
avg_interval = df.groupby("CustomerID")["Interval"].mean().reset_index().rename(columns={"Interval": "AvgInterval"})
rfm = pd.merge(rfm, avg_interval, on="CustomerID", how="left")

# === 直近3か月の購買回数 ===
recent_threshold = today - pd.Timedelta(days=90)
recent_purchases = (
    df[df["Date"] >= recent_threshold]
    .groupby("CustomerID")["Date"]
    .count()
    .reset_index()
    .rename(columns={"Date": "Last3MonthsPurchases"})
)
rfm = pd.merge(rfm, recent_purchases, on="CustomerID", how="left").fillna({"Last3MonthsPurchases": 0})

# === 離反ラベル作成（Recency > 90日 → 1） ===
rfm["ChurnFlag"] = (rfm["Recency"] > 90).astype(int)

# === 欠損処理 ===
rfm["AvgInterval"] = rfm["AvgInterval"].fillna(rfm["AvgInterval"].median())

# === 保存 ===
out_path = ROOT / "data" / "features.csv"
rfm.to_csv(out_path, index=False, encoding="utf-8-sig")

print("✅ 顧客特徴量ファイルを作成しました： features.csv")
print(rfm.head())

✅ 顧客特徴量ファイルを作成しました： features.csv
  CustomerID LastPurchase  Frequency  Monetary     AvgAmount  Recency  \
0      C0001   2025-10-08          5   38800.0   7760.000000       24   
1      C0002   2025-05-24          4   28500.0   7125.000000      161   
2      C0003   2025-10-11          6   75100.0  12516.666667       21   
3      C0004   2025-10-19         10  101300.0  10130.000000       13   
4      C0005   2025-10-22          5   60700.0  12140.000000       10   

   AvgInterval  Last3MonthsPurchases  ChurnFlag  
0    47.000000                   2.0          0  
1    46.666667                   0.0          1  
2    55.800000                   2.0          0  
3    30.777778                   3.0          0  
4    39.250000                   2.0          0  
