In [1]:
import pandas as pd
import numpy as np
from datetime import timedelta

In [2]:
np.random.seed(42)

In [3]:
n_transactions = 30000

transaction_ids = np.arange(1, n_transactions + 1)
user_ids = np.random.randint(1000, 6000, size=n_transactions)

amounts = np.round(np.random.uniform(100, 5000, size=n_transactions), 2)

payment_methods = np.random.choice(
    ['Credit Card', 'Debit Card', 'UPI', 'Net Banking', 'Wallet'],
    size=n_transactions,
    p=[0.3, 0.25, 0.25, 0.1, 0.1]
)

devices = np.random.choice(
    ['Android', 'iOS', 'Web'],
    size=n_transactions,
    p=[0.5, 0.3, 0.2]
)

transaction_dates = pd.to_datetime('2024-01-01') + pd.to_timedelta(
    np.random.randint(0, 180, size=n_transactions), unit='D'
)

In [4]:
payment_status = np.random.choice(
    ['success', 'failed'],
    size=n_transactions,
    p=[0.85, 0.15]
)

failure_reasons = np.where(
    payment_status == 'failed',
    np.random.choice(
        ['network_error', 'insufficient_funds', 'timeout', 'bank_decline'],
        size=n_transactions,
        p=[0.35, 0.30, 0.20, 0.15]
    ),
    None
)

In [5]:
retry_attempts = []

for status in payment_status:
    if status == 'failed':
        retry_attempts.append(
            np.random.choice([0, 1, 2, 3], p=[0.4, 0.3, 0.2, 0.1])
        )
    else:
        retry_attempts.append(0)

retry_attempts = np.array(retry_attempts)

In [6]:
recovered_flag = []

for status, retries in zip(payment_status, retry_attempts):
    if status == 'failed' and retries > 0:
        recovered_flag.append(
            np.random.choice([1, 0], p=[0.55, 0.45])
        )
    else:
        recovered_flag.append(0)

recovered_flag = np.array(recovered_flag)

In [7]:
transactions = pd.DataFrame({
    'transaction_id': transaction_ids,
    'user_id': user_ids,
    'transaction_date': transaction_dates,
    'amount': amounts,
    'payment_method': payment_methods,
    'device': devices,
    'payment_status': payment_status,
    'failure_reason': failure_reasons,
    'retry_count': retry_attempts,
    'recovered_flag': recovered_flag
})

In [8]:
transactions.head()

Unnamed: 0,transaction_id,user_id,transaction_date,amount,payment_method,device,payment_status,failure_reason,retry_count,recovered_flag
0,1,1860,2024-05-24,1324.34,Debit Card,iOS,success,,0,0
1,2,4772,2024-04-04,2237.72,Credit Card,iOS,success,,0,0
2,3,4092,2024-02-02,3190.09,Debit Card,Web,success,,0,0
3,4,1466,2024-02-25,940.45,Credit Card,Android,failed,network_error,0,0
4,5,5426,2024-05-04,3846.3,Credit Card,Web,success,,0,0


In [10]:
transactions.to_csv('raw_transactions.csv', index=False)