In [2]:
# ==========================================
# churn_data random_create
# ==========================================
# - 離反顧客分析用のデータをランダム作成
# ------------------------------------------
# Author : kokoyugura
# Date   : 2025-11
# Env    : Python 3.10+, openpyxl, pandas
# ==========================================

In [7]:
# === ライブラリ ===
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
from pathlib import Path

In [13]:
# === パラメータ設定 ===
num_customers = 20000
start_date = datetime(2025, 1, 1)
end_date = datetime(2025, 10, 31)

#ランダム値を固定する場合は、下記を有効に。
# np.random.seed(42)
# random.seed(42)


# === 顧客属性 ===
customers = [f"C{str(i).zfill(4)}" for i in range(1, num_customers + 1)]
ages = np.random.randint(20, 70, num_customers)
genders = np.random.choice(["男性", "女性"], num_customers, p=[0.45, 0.55])
customer_df = pd.DataFrame({"CustomerID": customers, "Age": ages, "Gender": genders})

In [14]:
# === トランザクション作成 ===
transactions = []
for cust in customers:
    n = np.random.poisson(5) + 1  # 平均5回購入（1〜10回くらい）
    for _ in range(n):
        date = start_date + timedelta(days=np.random.randint(0, (end_date - start_date).days))
        amount = round(np.random.uniform(1000, 20000), -2)
        transactions.append([cust, date, amount])

trans_df = pd.DataFrame(transactions, columns=["CustomerID", "Date", "Amount"])
trans_df = trans_df.sort_values(["CustomerID", "Date"]).reset_index(drop=True)

# === 保存 ===
ROOT = Path.cwd()
DATA_PATH = ROOT / "data"
DATA_PATH.mkdir(exist_ok=True)
out_path = DATA_PATH / "transactions.csv"
trans_df.to_csv(out_path, index=False, encoding="utf-8-sig")

print("✅ トランザクションデータ作成完了： transactions.csv")
print(trans_df.head())


✅ トランザクションデータ作成完了： transactions.csv
  CustomerID       Date   Amount
0      C0001 2025-04-03   9500.0
1      C0001 2025-05-05   2700.0
2      C0001 2025-06-08   7200.0
3      C0001 2025-08-31  13400.0
4      C0001 2025-10-08   6000.0
