# Realistic Transaction Data Generator
This notebook generates synthetic transaction data with over 30 configurable fields.
- Periodically appends to a CSV file
- Designed for future feature engineering and ML model training

In [None]:
import pandas as pd
import numpy as np
import random
import string
import time
from datetime import datetime
import os

output_file = 'transactions.csv'

params = {
    'num_rows': 100,
    'interval_seconds': 10,
    'fraud_rate': 0.05,
    'high_risk_merchants': [101, 202, 303],
    'merchant_ids': list(range(100, 110)),
    'customer_segments': ['standard', 'premium', 'vip'],
    'channels': ['online', 'instore'],
    'currencies': ['USD', 'EUR', 'GBP'],
    'countries': ['US', 'UK', 'DE', 'FR'],
    'device_types': ['mobile', 'desktop', 'tablet'],
    'card_types': ['debit', 'credit', 'prepaid'],
    'merchant_categories': ['grocery', 'electronics', 'fashion', 'travel', 'gaming']
}

def random_string(length=8):
    return ''.join(random.choices(string.ascii_uppercase + string.digits, k=length))

def generate_transaction(i):
    merchant_id = random.choice(params['merchant_ids'])
    amount = np.random.exponential(scale=200)
    channel = random.choice(params['channels'])
    country = random.choice(params['countries'])
    currency = random.choice(params['currencies'])
    device = random.choice(params['device_types'])
    card = random.choice(params['card_types'])
    segment = random.choice(params['customer_segments'])
    merchant_category = random.choice(params['merchant_categories'])
    customer_id = f"C{random.randint(10000, 99999)}"

    fraud_prob = params['fraud_rate']
    if merchant_id in params['high_risk_merchants']:
        fraud_prob += 0.05
    if amount > 1000:
        fraud_prob += 0.02
    if channel == 'online':
        fraud_prob += 0.01

    return {
        'transaction_id': f"T{int(time.time())}_{i}",
        'timestamp': datetime.now().isoformat(),
        'customer_id': customer_id,
        'merchant_id': merchant_id,
        'merchant_category': merchant_category,
        'amount': round(amount, 2),
        'currency': currency,
        'channel': channel,
        'country': country,
        'device_type': device,
        'card_type': card,
        'customer_segment': segment,
        'fraud': int(random.random() < min(fraud_prob, 0.99)),
        'session_id': random_string(12),
        'browser': random.choice(['Chrome', 'Safari', 'Firefox', 'Edge']),
        'ip_address': f"192.168.{random.randint(0,255)}.{random.randint(0,255)}",
        'latitude': round(random.uniform(-90, 90), 6),
        'longitude': round(random.uniform(-180, 180), 6),
        'email_domain': random.choice(['gmail.com', 'yahoo.com', 'outlook.com']),
        'product_id': f"P{random.randint(1000,9999)}",
        'auth_method': random.choice(['password', 'otp', 'biometric']),
        'attempts': random.randint(1, 5),
        'login_duration': round(random.uniform(0.5, 15.0), 2),
        'shipping_speed': random.choice(['standard', 'express', 'same-day']),
        'cart_value': round(amount + np.random.normal(20, 5), 2),
        'items_in_cart': random.randint(1, 10),
        'referrer': random.choice(['google', 'email', 'ad', 'direct']),
        'loyalty_points': random.randint(0, 5000),
        'coupon_used': random.choice([0, 1]),
        'mobile_app': random.choice([0, 1]),
        'promo_code': random.choice(['', 'SAVE10', 'FREESHIP', 'WELCOME'])
    }

# Data generation loop
while True:
    batch = [generate_transaction(i) for i in range(params['num_rows'])]
    df = pd.DataFrame(batch)
    if not os.path.exists(output_file):
        df.to_csv(output_file, index=False)
    else:
        df.to_csv(output_file, mode='a', header=False, index=False)
    print(f"Appended {params['num_rows']} rows to {output_file}")
    time.sleep(params['interval_seconds'])
