In [4]:
import pandas as pd
import numpy as np
from faker import Faker
import random

fake = Faker()
np.random.seed(42)

# --- 1. Customers ---
n_customers = 500
customer_ids = [f"CUST{1000+i}" for i in range(n_customers)]
customers = pd.DataFrame({
    "customer_id": customer_ids,
    "name": [fake.name() for _ in range(n_customers)],
    "age": np.random.normal(40, 12, size=n_customers).astype(int),
    "city": [fake.city() for _ in range(n_customers)],
    "risk_score": np.random.choice([None, 0.2, 0.5, 0.9, 1.0], size=n_customers, p=[0.1, 0.3, 0.3, 0.2, 0.1])
})
# Add missing values & outliers
customers.loc[5, 'age'] = None
customers.loc[23, 'age'] = 150  # outlier

# --- 2. Accounts ---
account_types = ['Checking', 'Savings', 'Investment']
accounts = []
for cid in customer_ids:
    for _ in range(random.choice([1, 1, 2])):  # some customers have 2 accounts
        account_id = f"ACC{random.randint(10000,99999)}"
        accounts.append({
            "account_id": account_id,
            "customer_id": cid,
            "account_type": random.choice(account_types),
            "open_date": fake.date_between(start_date='-5y', end_date='-1y'),
            "balance": round(np.random.normal(5000, 3000), 2)
        })

accounts = pd.DataFrame(accounts)
accounts.loc[3, 'balance'] = -2000  # anomaly

# --- 3. Transactions ---
transactions = []
for _, row in accounts.iterrows():
    acc_id = row['account_id']
    for _ in range(random.randint(10, 40)):
        tx = {
            "transaction_id": f"TX{random.randint(100000, 999999)}",
            "account_id": acc_id,
            "timestamp": fake.date_time_between(start_date='-1y', end_date='now'),
            "amount": round(np.random.normal(150, 75), 2),
            "type": random.choice(['debit', 'credit']),
            "is_fraud": np.random.choice([0, 1], p=[0.97, 0.03])
        }
        transactions.append(tx)

transactions = pd.DataFrame(transactions)
transactions.loc[10, 'amount'] = 99999  # outlier
transactions.loc[50, 'amount'] = None   # missing

# --- Save to CSV ---
customers.to_csv('C:/portfolio/bank_project/data/customers.csv', index=False)
accounts.to_csv('C:/portfolio/bank_project/data/accounts.csv', index=False)
transactions.to_csv('C:/portfolio/bank_project/data/transactions.csv', index=False)

print("✅ Mock data generated and saved.")


✅ Mock data generated and saved.
