# Generate Synthetic Data
Creates customers/accounts/transactions CSVs in `../data`.


In [0]:
import numpy as np, pandas as pd
from pathlib import Path
rng = np.random.default_rng(42)
base = Path('../data')
base.mkdir(parents=True, exist_ok=True)
N = 3000
customers = pd.DataFrame({
    'customer_id': range(1, N+1),
    'tenure_months': rng.integers(1, 120, N),
    'segment': rng.choice(['Mass','Affluent','HNW'], N, p=[0.7,0.25,0.05]),
    'region': rng.choice(['NE','SE','MW','SW','W'], N),
})
base_churn = 0.10 + 0.18*(customers['tenure_months']<12) + 0.05*(customers['segment']=='Mass')
customers['churn_90d'] = rng.binomial(1, np.clip(base_churn,0,0.8))
customers.to_csv(base/'customers.csv', index=False)

A = 4200
accounts = pd.DataFrame({
    'account_id': range(1, A+1),
    'customer_id': rng.integers(1, N+1, A),
    'product_type': rng.choice(['Checking','Savings','Credit'], A, p=[0.5,0.3,0.2]),
    'balance': np.round(rng.normal(3500, 1800, A), 2)
})
accounts.to_csv(base/'accounts.csv', index=False)

T = 25000
transactions = pd.DataFrame({
    'txn_id': range(1, T+1),
    'account_id': rng.integers(1, A+1, T),
    'txn_date': pd.to_datetime('today') - pd.to_timedelta(rng.integers(0, 180, T), unit='D'),
    'amount': np.round(rng.normal(0, 120, T), 2),
    'fee_flag': rng.choice([0,1], T, p=[0.9,0.1])
})
transactions.to_csv(base/'transactions.csv', index=False)
print('Synthetic data written to', base)
