In [3]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

def generate_transaction_data(num_transactions=1000):
    """Generates realistic SWIFT and XRP transaction data."""

    transactions = []
    for i in range(num_transactions):
        transaction_id = f"TXN-{i+1:06d}"
        if i > 0:
            original_transaction_id = transaction_id if random.random() > 0.1 else f"TXN-{random.randint(1, i):06d}" # 10% chance of retry
        else:
            original_transaction_id = transaction_id # if i is 0, original and transaction will be the same.

        timestamp = datetime.now() - timedelta(days=random.randint(1, 365))
        amount = round(random.uniform(100, 1000000), 2)
        currency = random.choice(["USD", "EUR", "GBP", "JPY", "CAD"])
        source_country = random.choice(["US", "GB", "DE", "JP", "CA", "AU", "CN", "IN", "BR", "ZA"])
        destination_country = random.choice([c for c in ["US", "GB", "DE", "JP", "CA", "AU", "CN", "IN", "BR", "ZA"] if c != source_country])
        transaction_purpose = random.choice(["Trade Settlement", "Remittance", "Investment", "Payment"])
        priority = random.choice(["Standard", "Urgent"])
        customer_id = f"CUST-{random.randint(1000, 9999)}"

        # SWIFT Data
        swift_start_time = timestamp
        swift_time_days = random.uniform(1, 7)
        swift_end_time = swift_start_time + timedelta(days=swift_time_days)
        swift_time_seconds = swift_time_days * 24 * 3600
        swift_fees = round(amount * random.uniform(0.001, 0.01), 2)  # 0.1% to 1% of transaction amount
        swift_intermediary_banks = random.randint(1, 4)
        swift_compliance_time = random.randint(3600, 86400) # 1 hour to 1 day
        swift_failure_rate = random.random() * 0.05 # up to 5% failure rate
        swift_failure_reason = "N/A" if swift_failure_rate < 0.01 else random.choice(["Invalid Account", "Compliance Issue", "Network Error"])
        swift_message_type = random.choice(["MT103", "MT202"])
        swift_processing_time = random.randint(60, 3600)
        swift_receiving_bank_processing_time = random.randint(60, 3600)
        swift_correspondent_bank_processing_times = ",".join([str(random.randint(60, 3600)) for _ in range(swift_intermediary_banks)])
        swift_processing_errors = "N/A" if random.random() > 0.95 else random.choice(["Timeout", "Data Error", "Format Error"])

        # XRP Data
        xrp_start_time = timestamp
        xrp_time_seconds = random.uniform(5, 60)  # 5 to 60 seconds
        xrp_end_time = xrp_start_time + timedelta(seconds=xrp_time_seconds)
        xrp_fees = round(random.uniform(0.01, 1), 6) # minimal fees
        xrp_ledger_index = random.randint(100000, 1000000)
        xrp_destination_tag = random.randint(10000, 99999) if random.random() > 0.5 else None
        xrp_transaction_hash = f"XRP-TXN-{random.randint(1000000, 9999999)}"
        xrp_failure_rate = random.random() * 0.01 # up to 1% failure rate
        xrp_failure_reason = "N/A" if xrp_failure_rate < 0.001 else random.choice(["Insufficient Funds", "Invalid Destination Tag", "Network Issue"])
        xrp_liquidity = round(random.uniform(100000, 10000000), 2)
        xrp_path_used = random.choice(["Direct", "Intermediary"])
        xrp_validation_time = random.uniform(1, 5)
        xrp_offer_used = f"OFFER-{random.randint(100, 999)}" if random.random() > 0.7 else None
        xrp_memos = "N/A" if random.random() > 0.9 else f"Memo-{random.randint(1000, 9999)}"
        xrp_transaction_type = random.choice(["Payment", "OfferCreate", "OfferCancel"])
        xrp_account_sequence = random.randint(1,100000)

        # Comparison Metrics
        time_difference = swift_time_seconds - xrp_time_seconds
        fee_difference = swift_fees - xrp_fees
        time_improvement_percentage = (time_difference / swift_time_seconds) * 100 if swift_time_seconds != 0 else 0
        fee_improvement_percentage = (fee_difference / swift_fees) * 100 if swift_fees != 0 else 0
        xrp_outperforms_swift = time_difference > 0 and fee_difference > 0

        transactions.append({
            "transaction_id": transaction_id,
            "original_transaction_id": original_transaction_id,
            "timestamp": timestamp,
            "amount": amount,
            "currency": currency,
            "source_country": source_country,
            "destination_country": destination_country,
            "transaction_purpose": transaction_purpose,
            "priority": priority,
            "customer_id": customer_id,
            "swift_start_time": swift_start_time,
            "swift_end_time": swift_end_time,
            "swift_time": swift_time_seconds,
            "swift_fees": swift_fees,
            "swift_intermediary_banks": swift_intermediary_banks,
            "swift_compliance_time": swift_compliance_time,
            "swift_failure_rate": swift_failure_rate,
            "swift_failure_reason": swift_failure_reason,
            "swift_message_type": swift_message_type,
            "swift_processing_time": swift_processing_time,
            "swift_receiving_bank_processing_time": swift_receiving_bank_processing_time,
            "swift_correspondent_bank_processing_times": swift_correspondent_bank_processing_times,
            "swift_processing_errors": swift_processing_errors,
            "xrp_start_time": xrp_start_time,
            "xrp_end_time": xrp_end_time,
            "xrp_time": xrp_time_seconds,
            "xrp_fees": xrp_fees,
            "xrp_ledger_index": xrp_ledger_index,
            "xrp_destination_tag": xrp_destination_tag,
            "xrp_transaction_hash": xrp_transaction_hash,
            "xrp_failure_rate": xrp_failure_rate,
            "xrp_failure_reason": xrp_failure_reason,
            "xrp_liquidity": xrp_liquidity,
            "xrp_path_used": xrp_path_used,
            "xrp_validation_time": xrp_validation_time,
            "xrp_offer_used": xrp_offer_used,
            "xrp_memos": xrp_memos,
            "xrp_transaction_type": xrp_transaction_type,
            "xrp_account_sequence": xrp_account_sequence,
            "time_difference": time_difference,
            "fee_difference": fee_difference,
            "time_improvement_percentage": time_improvement_percentage,
            "fee_improvement_percentage": fee_improvement_percentage,
            "xrp_outperforms_swift": xrp_outperforms_swift
        })

    df = pd.DataFrame(transactions)
    return df

# Generate and save data
transaction_df = generate_transaction_data()
transaction_df.to_parquet("transactions_comparison.parquet")

print("Data generated and saved to transactions_comparison.parquet")

Data generated and saved to transactions_comparison.parquet


In [5]:
transaction_df.head()

Unnamed: 0,transaction_id,original_transaction_id,timestamp,amount,currency,source_country,destination_country,transaction_purpose,priority,customer_id,...,xrp_validation_time,xrp_offer_used,xrp_memos,xrp_transaction_type,xrp_account_sequence,time_difference,fee_difference,time_improvement_percentage,fee_improvement_percentage,xrp_outperforms_swift
0,TXN-000001,TXN-000001,2024-04-03 16:43:41.124749,838798.65,CAD,AU,ZA,Payment,Urgent,CUST-7206,...,3.145274,OFFER-280,Memo-5219,Payment,51941,529409.235734,1365.261703,99.997375,99.927664,True
1,TXN-000002,TXN-000001,2024-06-17 16:43:41.124749,468278.89,JPY,GB,ZA,Trade Settlement,Urgent,CUST-6269,...,1.581947,,Memo-9487,OfferCreate,20795,349659.198471,4300.31374,99.994997,99.996831,True
2,TXN-000003,TXN-000001,2024-07-03 16:43:41.124749,459771.27,JPY,BR,DE,Trade Settlement,Urgent,CUST-3842,...,1.976406,,Memo-9993,OfferCreate,69998,300495.347803,959.665827,99.99088,99.921475,True
3,TXN-000004,TXN-000004,2024-04-29 16:43:41.124749,1387.32,EUR,CN,CA,Investment,Urgent,CUST-2597,...,1.164735,,Memo-7317,OfferCreate,4886,121320.115745,3.947224,99.959315,97.703564,True
4,TXN-000005,TXN-000005,2024-09-22 16:43:41.124749,248027.55,USD,GB,BR,Remittance,Urgent,CUST-9891,...,1.41717,OFFER-513,Memo-1813,Payment,26511,309502.610957,2438.30252,99.997546,99.995182,True


In [None]:
import pandas as pd
import random
from faker import Faker
from datetime import datetime, timedelta

fake = Faker()

def generate_dummy_transactions(num_transactions=1000):
    """Generates complex dummy XRP transaction data for banks."""
    data = []
    for i in range(num_transactions):
        transaction_id = f"TXN{i+100000}"
        sender_xrp = f"r{fake.hexify(text='^40')}"
        receiver_xrp = f"r{fake.hexify(text='^40')}"
        amount_xrp = round(random.uniform(0.01, 10000), 8)
        amount_fiat = round(amount_xrp * random.uniform(0.5, 2.0), 2)  # Simulate fiat amount
        fiat_currency = random.choice(['USD', 'EUR', 'GBP', 'JPY']) if random.random() < 0.8 else None #80% of transactions will have fiat currency
        timestamp = fake.date_time_between(start_date='-1y', end_date='now').strftime('%Y-%m-%d %H:%M:%S')
        status = random.choice(['Success', 'Pending', 'Failed', 'Rejected'])
        fee_xrp = round(random.uniform(0.0001, 0.01), 8)
        destination_tag = fake.random_int(min=0, max=4294967295) if random.random() < 0.6 else None #60% of transactions have destination tags
        exchange_rate = round(random.uniform(0.5, 2.0), 4) if fiat_currency else None
        exchange_rate_timestamp = fake.date_time_between(start_date='-1y', end_date='now').strftime('%Y-%m-%d %H:%M:%S') if fiat_currency else None
        exchange_rate_source = random.choice(['Kraken', 'Binance', 'Internal', 'Bank API']) if fiat_currency else None
        purpose = fake.sentence()
        originating_bank_branch = fake.city() if random.random() < 0.4 else None #40% of transactions have bank branch origin.
        destination_bank_branch = fake.city() if random.random() < 0.4 else None #40% of transactions have bank branch destination.
        correspondent_bank = fake.company() if random.random() < 0.2 else None #20% of transactions have correspondent bank.
        transaction_type = random.choice(['Payment', 'Trade', 'Escrow', 'Bridge'])
        aml_risk_score = random.randint(0, 100) if random.random() < 0.7 else None #70% of transactions have AML scores
        sanctions_match = random.choice(['Yes', 'No']) if random.random() < 0.1 else None #10% of transactions have sanctions matches
        transaction_source = random.choice(['Web', 'Mobile', 'API', 'Branch'])
        customer_id = f"USER{random.randint(1000, 2000)}" if random.random() < 0.9 else None #90% of transactions have customer ID.
        internal_account_id = f"ACC{random.randint(10000, 20000)}" if random.random() < 0.8 else None #80% of transactions have internal account ID.
        ledger_sequence = random.randint(100000, 500000)
        validation_status = random.choice(['Valid', 'Invalid'])
        payment_method = random.choice(['XRP Ledger', 'ILP', 'RippleNet']) if transaction_type == 'Bridge' else None
        regulatory_flag = random.choice(['High Risk', 'Medium Risk', 'Low Risk']) if aml_risk_score and aml_risk_score > 70 else None #regulatory flag based on aml risk score

        data.append({
            'Transaction ID (XRP Ledger)': transaction_id,
            'Sender XRP Address': sender_xrp,
            'Receiver XRP Address': receiver_xrp,
            'Transaction Amount (XRP)': amount_xrp,
            'Transaction Amount (Fiat)': amount_fiat,
            'Transaction Currency (If applicable)': fiat_currency,
            'Transaction Timestamp': timestamp,
            'Transaction Status': status,
            'Transaction Fees (XRP)': fee_xrp,
            'Destination Tag (Memo/Note)': destination_tag,
            'Exchange Rate (XRP to Fiat)': exchange_rate,
            'Exchange Rate Timestamp': exchange_rate_timestamp,
            'Exchange Rate Source': exchange_rate_source,
            'Transaction Purpose/Description': purpose,
            'Originating Bank Branch': originating_bank_branch,
            'Destination Bank Branch': destination_bank_branch,
            'Correspondent Bank Information (If applicable)': correspondent_bank,
            'Transaction Type': transaction_type,
            'AML Risk Score': aml_risk_score,
            'Sanctions Match': sanctions_match,
            'Transaction Source': transaction_source,
            'Customer ID': customer_id,
            'Internal Account ID': internal_account_id,
            'Ledger Sequence': ledger_sequence,
            'Validation Status': validation_status,
            'Payment Method': payment_method,
            'Regulatory Flag': regulatory_flag
        })

    return pd.DataFrame(data)

# Example usage
transactions_df = generate_dummy_transactions()
transactions_df.to_parquet('dummy_xrp_transactions.parquet', index=False)
print("Dummy transactions generated and stored as dummy_xrp_transactions.parquet")