In [None]:
import pandas as pd
import random
from faker import Faker
from datetime import datetime, timedelta

fake = Faker()

def generate_dummy_customer_data(num_customers=100):
    """Generates dummy customer identity data."""
    data = []
    for i in range(num_customers):
        user_id = f"USER{i+1000}"
        full_name = fake.name()
        dob = fake.date_between(start_date='-65y', end_date='-18y').strftime('%Y-%m-%d')
        nationality = fake.country()
        address = fake.address().replace('\n', ', ')
        phone = fake.phone_number()
        email = fake.email()
        id_type = random.choice(['Passport', 'Driver\'s License', 'National ID'])
        id_number = fake.ssn()
        issue_date = fake.date_between(start_date='-10y', end_date='-1y').strftime('%Y-%m-%d')
        expiry_date = (datetime.strptime(issue_date, '%Y-%m-%d') + timedelta(days=random.randint(365 * 3, 365 * 10))).strftime('%Y-%m-%d')
        xrp_address = f"r{fake.hexify(text='^40')}"
        account_creation_date = fake.date_between(start_date='-5y', end_date='today').strftime('%Y-%m-%d')
        account_status = random.choice(['Active', 'Inactive', 'Blocked'])
        account_type = random.choice(['Personal', 'Business'])
        preferred_currency = random.choice(['USD', 'EUR', 'JPY', 'GBP'])
        authorized_xrp_addresses = [f"r{fake.hexify(text='^40')}" for _ in range(random.randint(0, 3))]

        data.append({
            'User ID (Internal Bank System)': user_id,
            'Full Name (Legal Name)': full_name,
            'Date of Birth': dob,
            'Nationality': nationality,
            'Residential Address (Physical and Postal)': address,
            'Contact Information (Phone, Email)': f"{phone}, {email}",
            'Government-Issued Identification:Type of ID': id_type,
            'ID Number': id_number,
            'Issue Date': issue_date,
            'Expiry Date': expiry_date,
            'XRP Address(es)': xrp_address,
            'Account Creation Date': account_creation_date,
            'Account Status': account_status,
            'Account Type': account_type,
            'Preferred Currency': preferred_currency,
            'Authorized XRP Addresses (Whitelisting)': str(authorized_xrp_addresses)
        })

    return pd.DataFrame(data)

def generate_dummy_transaction_data(num_transactions=500, customers_df=None):
    """Generates dummy XRP transaction data."""
    data = []
    if customers_df is None or customers_df.empty:
        return pd.DataFrame() # return empty dataframe if no customers.

    for i in range(num_transactions):
        transaction_id = f"TXN{i+10000}"
        sender_xrp = random.choice(customers_df['XRP Address(es)'].tolist())
        receiver_xrp = f"r{fake.hexify(text='^40')}"
        amount = round(random.uniform(1, 1000), 6)
        currency = random.choice(['USD', 'EUR', None])
        timestamp = fake.date_time_between(start_date='-1y', end_date='now').strftime('%Y-%m-%d %H:%M:%S')
        status = random.choice(['Success', 'Pending', 'Failed'])
        fees = round(random.uniform(0.0001, 0.01), 6)
        destination_tag = fake.random_int(min=0, max=100000) if random.random() < 0.5 else None
        exchange_rate = round(random.uniform(0.5, 2.0), 4) if currency else None
        exchange_rate_timestamp = fake.date_time_between(start_date='-1y', end_date='now').strftime('%Y-%m-%d %H:%M:%S') if currency else None
        exchange_rate_source = random.choice(['Kraken', 'Binance', 'Internal']) if currency else None
        purpose = fake.sentence()

        data.append({
            'Transaction ID (XRP Ledger)': transaction_id,
            'Sender XRP Address': sender_xrp,
            'Receiver XRP Address': receiver_xrp,
            'Transaction Amount (XRP)': amount,
            'Transaction Currency (If applicable)': currency,
            'Transaction Timestamp': timestamp,
            'Transaction Status': status,
            'Transaction Fees (XRP)': fees,
            'Destination Tag (Memo/Note)': destination_tag,
            'Exchange Rate (XRP to Fiat)': exchange_rate,
            'Exchange Rate Timestamp': exchange_rate_timestamp,
            'Exchange Rate Source': exchange_rate_source,
            'Transaction Purpose/Description': purpose,
        })

    return pd.DataFrame(data)

def generate_dummy_kyc_data(customers_df):
    """Generates dummy KYC/AML data."""
    data = []
    if customers_df is None or customers_df.empty:
        return pd.DataFrame() # return empty dataframe if no customers.

    for index, row in customers_df.iterrows():
        user_id = row['User ID (Internal Bank System)']
        source_of_funds = fake.sentence()
        occupation = fake.job()
        transaction_history = fake.sentence()
        risk_score = random.randint(0, 100)
        sanctions_result = random.choice(['Clear', 'Alert'])
        pep_status = random.choice(['Yes', 'No'])
        verification_records = fake.file_path()

        data.append({
            'User ID (Internal Bank System)': user_id,
            'Source of Funds/Wealth': source_of_funds,
            'Occupation/Business Details': occupation,
            'Transaction History (Internal Bank Transactions)': transaction_history,
            'Risk Scores (AML/KYC)': risk_score,
            'Sanctions Screening Results': sanctions_result,
            'PEP (Politically Exposed Person) Status': pep_status,
            'KYC/AML Document Scans/Verification Records': verification_records
        })

    return pd.DataFrame(data)

# Example usage
customers_df = generate_dummy_customer_data()
transactions_df = generate_dummy_transaction_data(customers_df=customers_df)
kyc_df = generate_dummy_kyc_data(customers_df)

# Store DataFrames as Parquet files
customers_df.to_parquet('customers.parquet', index=False)
transactions_df.to_parquet('transactions.parquet', index=False)
kyc_df.to_parquet('kyc.parquet', index=False)

print("Dummy data generated and stored as Parquet files.")