In [2]:
import pandas as pd
import random
import string
import numpy as np
from datetime import datetime, timedelta
import os

# Setup
NUM_USERS = 500
NUM_CARDS = 1000
NUM_TRANSACTIONS = 2000
NUM_SCHEDULED = 500
LIMIT_AMOUNT = 150_000_000
OUTPUT_DIR = "files"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Helper functions
def random_name():
    return f"{random.choice(first_names)} {random.choice(last_names)}"

def random_phone():
    return "998" + "".join(random.choices(string.digits, k=9))

def random_email(name):
    domains = ["example.com", "mail.com", "test.org", "demo.net"]
    username = name.lower().replace(" ", ".")
    return f"{username}@{random.choice(domains)}"

def random_card_number():
    return "".join(random.choices(string.digits, k=16))

def random_card_type():
    return random.choice(["debit", "credit", "savings"])

def random_transaction_status():
    return random.choice(["pending", "success", "failed"])

def random_transaction_type():
    return random.choice(["transfer", "withdrawal", "deposit"])

def random_timestamp(start_days_ago=60):
    start_date = datetime.now() - timedelta(days=start_days_ago)
    random_date = start_date + timedelta(days=random.randint(0, start_days_ago), hours=random.randint(0, 23))
    return random_date.strftime("%Y-%m-%d %H:%M:%S")

# Name data
first_names = ["John", "Jane", "Alex", "Maria", "Ahmed", "Wei", "Olga", "Carlos", "Fatima", "Sanjay"]
last_names = ["Smith", "Doe", "Johnson", "Lee", "Patel", "Garcia", "Kim", "Khan", "Ivanov", "Chen"]

# USERS (t01.csv)
users = []
for i in range(NUM_USERS):
    name = random_name()
    users.append({
        "01-01": name,
        "01-02": random_phone(),
        "01-03": random_email(name),
        "01-04": random_timestamp(),
        "01-05": random_timestamp(),
        "01-06": random.choice(["active", "blocked", "vip"]),
        "01-07": random.choice([True, False]),
        "01-08": random.randint(0, 500_000_000)
    })
df_users = pd.DataFrame(users)
df_users.to_csv(f"{OUTPUT_DIR}/t01.csv", index=False)

# CARDS (t02.csv)
cards = []
for i in range(NUM_CARDS):
    user_id = random.randint(1, NUM_USERS)
    balance = random.randint(0, 300_000_000)
    cards.append({
        "02-01": user_id,
        "02-02": random_card_number(),
        "02-03": balance,
        "02-04": balance > LIMIT_AMOUNT,
        "02-05": random_timestamp(),
        "02-06": random_card_type(),
        "02-07": LIMIT_AMOUNT
    })
df_cards = pd.DataFrame(cards)
df_cards.to_csv(f"{OUTPUT_DIR}/t02.csv", index=False)

# TRANSACTIONS (t03.csv)
transactions = []
for i in range(NUM_TRANSACTIONS):
    from_id = random.randint(1, NUM_CARDS)
    to_id = random.randint(1, NUM_CARDS)
    while to_id == from_id:
        to_id = random.randint(1, NUM_CARDS)
    amount = random.randint(1000, 10_000_000)
    transactions.append({
        "03-01": from_id,
        "03-02": to_id,
        "03-03": amount,
        "03-04": random_transaction_status(),
        "03-05": random_timestamp(),
        "03-06": random_transaction_type(),
        "03-07": amount > 5_000_000
    })
df_txns = pd.DataFrame(transactions)
df_txns.to_csv(f"{OUTPUT_DIR}/t03.csv", index=False)

# LOGS (t04.csv)
logs = []
for i, row in df_txns.iterrows():
    if row["03-04"] in ["failed", "pending"]:
        logs.append({
            "04-01": i + 1,
            "04-02": f"Transaction status is {row['03-04']}",
            "04-03": row["03-05"]
        })
df_logs = pd.DataFrame(logs)
df_logs.to_csv(f"{OUTPUT_DIR}/t04.csv", index=False)

# REPORTS (t05.csv)
df_txns['report_date'] = pd.to_datetime(df_txns['03-05']).dt.date
df_reports = df_txns.groupby('report_date').agg(
    total_transactions=('03-01', 'count'),
    flagged_transactions=('03-07', 'sum'),
    total_amount=('03-03', 'sum')
).reset_index()

df_reports['05-01'] = 'daily'
df_reports['05-02'] = df_reports['report_date'].astype(str)
df_reports['05-03'] = df_reports['total_transactions']
df_reports['05-04'] = df_reports['flagged_transactions']
df_reports['05-05'] = df_reports['total_amount']
df_reports = df_reports[['05-01', '05-02', '05-03', '05-04', '05-05']]
df_reports.to_csv(f"{OUTPUT_DIR}/t05.csv", index=False)

# SCHEDULED PAYMENTS (t07.csv)
sched = []
for _ in range(NUM_SCHEDULED):
    sched.append({
        "07-01": random.randint(1, NUM_USERS),
        "07-02": random.randint(1, NUM_CARDS),
        "07-03": random.randint(1000, 5000000),
        "07-04": (datetime.now() + timedelta(days=random.randint(1, 60))).strftime("%Y-%m-%d %H:%M:%S"),
        "07-05": random.choice(["pending", "completed", "failed"]),
        "07-06": random_timestamp()
    })
df_sched = pd.DataFrame(sched)
df_sched.to_csv(f"{OUTPUT_DIR}/t07.csv", index=False)

OUTPUT_DIR


'files'