In [2]:
!pip install faker

Collecting faker
  Downloading faker-38.0.0-py3-none-any.whl.metadata (15 kB)
Downloading faker-38.0.0-py3-none-any.whl (2.0 MB)
   ---------------------------------------- 0.0/2.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.0 MB ? eta -:--:--
   ----- ---------------------------------- 0.3/2.0 MB ? eta -:--:--
   ----- ---------------------------------- 0.3/2.0 MB ? eta -:--:--
   ---------- ----------------------------- 0.5/2.0 MB 699.0 kB/s eta 0:00:03
   --------------- ------------------------ 0.8/2.0 MB 958.5 kB/s eta 0:00:02
   -------------------------- ------------- 1.3/2.0 MB 1.2 MB/s eta 0:00:01
   ------------------------------- -------- 1.6/2.0 MB 1.3 MB/s eta 0:00:01
   ------------------------------- -------- 1.6/2.0 MB 1.3 MB/s eta 0:00:01
   ------------------------------- -------- 1.6/2.0 MB 1.3 MB/s eta 0:00:01
   ------------------------------------- -- 1.8/2.0 MB 996.7 kB/s eta 0:00:01
   ---------------------------------------- 2.0/2.0 MB 93

In [4]:
import pandas as pd
import numpy as np
from faker import Faker
import random
from datetime import datetime, timedelta

fake = Faker()

# -------------------------
# 1. Generate Customers
# -------------------------
num_customers = 800
customers = []

for i in range(num_customers):
    customers.append({
        "customer_id": i+1,
        "name": fake.name(),
        "age": random.randint(18, 70),
        "gender": random.choice(["Male", "Female"]),
        "city": fake.city(),
        "state": fake.state(),
        "income": random.randint(2000, 20000)
    })

customers_df = pd.DataFrame(customers)

# -------------------------
# 2. Generate Merchants
# -------------------------
merchant_categories = [
    "Groceries", "Restaurants", "Electronics", "Fashion",
    "Travel", "Healthcare", "Fuel", "Entertainment", "Online Services"
]

merchants = []
num_merchants = 200

for i in range(num_merchants):
    merchants.append({
        "merchant_id": i+1,
        "merchant_name": fake.company(),
        "category": random.choice(merchant_categories)
    })

merchants_df = pd.DataFrame(merchants)

# -------------------------
# 3. Generate Cards
# -------------------------
cards = []
card_types = ["Visa", "Mastercard", "Amex"]

for cust_id in customers_df.customer_id:
    for _ in range(random.randint(1, 3)):  # each customer has 1â€“3 cards
        cards.append({
            "card_id": len(cards)+1,
            "customer_id": cust_id,
            "card_type": random.choice(card_types),
            "credit_limit": random.randint(2000, 20000)
        })

cards_df = pd.DataFrame(cards)

# -------------------------
# 4. Generate Transactions
# -------------------------
num_transactions = 50000
transactions = []

for i in range(num_transactions):
    cust = customers_df.sample(1).iloc[0]
    card = cards_df[cards_df.customer_id == cust.customer_id].sample(1).iloc[0]
    merch = merchants_df.sample(1).iloc[0]

    timestamp = fake.date_time_between(start_date="-6M", end_date="now")

    amount = round(np.random.exponential(scale=50) + random.uniform(1, 300), 2)

    # Fraud injection
    is_fraud = 0
    if random.random() < 0.02:  # 2% fraud rate
        is_fraud = 1
        amount *= random.uniform(2, 10)  # unusually large
        timestamp += timedelta(minutes=random.randint(1, 5))

    transactions.append({
        "transaction_id": i+1,
        "customer_id": cust.customer_id,
        "merchant_id": merch.merchant_id,
        "card_id": card.card_id,
        "amount": amount,
        "timestamp": timestamp,
        "transaction_type": random.choice(["Online", "POS"]),
        "is_fraud": is_fraud
    })

transactions_df = pd.DataFrame(transactions)

# -------------------------
# Save to CSV (local folder)
# -------------------------
customers_df.to_csv("customers.csv", index=False)
merchants_df.to_csv("merchants.csv", index=False)
cards_df.to_csv("cards.csv", index=False)
transactions_df.to_csv("transactions.csv", index=False)

print("Files successfully saved:")
print("customers.csv")
print("merchants.csv")
print("cards.csv")
print("transactions.csv")

Files successfully saved:
customers.csv
merchants.csv
cards.csv
transactions.csv
