In [7]:
# ==========================================
# customer_data random_create
# ==========================================
# - 分析用のデータをランダム作成
# ------------------------------------------
# Author : kokoyugura
# Date   : 2025-11
# Env    : Python 3.10+, openpyxl, pandas
# ==========================================

In [8]:
# === ライブラリ ===
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
from pathlib import Path

In [9]:
# === 設定 ===
#ランダム値を固定する場合は、下記を有効に。
# np.random.seed(42)
# random.seed(42)

num_customers = 2000       # 顧客数
num_transactions = 50000   # トランザクション数
start_date = datetime(2025, 1, 1)
end_date = datetime(2025, 10, 31)

In [10]:
# === 顧客ID生成 ===
customers = [f"C{str(i).zfill(4)}" for i in range(1, num_customers + 1)]

# ===取引データ生成 ===
data = []
for _ in range(num_transactions):
    customer = random.choice(customers)
    date = start_date + timedelta(days=random.randint(0, (end_date - start_date).days))
    amount = round(random.uniform(1000, 30000), -2)  # 1000〜30000円
    data.append([customer, date, amount])

# === DataFrame化 ===
df = pd.DataFrame(data, columns=["CustomerID", "Date", "Amount"])

# === 購買日順にソート ===
df = df.sort_values(["CustomerID", "Date"]).reset_index(drop=True)

# === CSV出力 ===
ROOT = Path.cwd()
OUTPUT_DIR = ROOT / "data"  
OUTPUT_DIR.mkdir(exist_ok=True)
out_path = OUTPUT_DIR / "customer_data.csv"
df.to_csv(out_path, index=False, encoding="utf-8-sig")

print("✅ サンプル顧客データを作成しました： customer_data.csv")
print(df.head())

✅ サンプル顧客データを作成しました： customer_data.csv
  CustomerID       Date   Amount
0      C0001 2025-01-03  29200.0
1      C0001 2025-01-03   1800.0
2      C0001 2025-01-13   7900.0
3      C0001 2025-01-19   2500.0
4      C0001 2025-01-26  28200.0
