In [None]:
import pandas as pd
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# Number of transactions to generate
num_transactions = 10000

In [None]:
# Generate transaction types
transaction_types = np.random.choice(['CASH_IN', 'CASH_OUT', 'DEBIT', 'PAYMENT'], size=num_transactions)

In [None]:
# Generate transaction amounts
transaction_amounts = np.random.normal(loc=5000, scale=2000, size=num_transactions)
transaction_amounts = np.clip(transaction_amounts, 0, None)  # Ensure non-negative amounts

In [None]:
# Generate account balances before transaction
starting_balances = np.random.normal(loc=50000, scale=20000, size=num_transactions)
starting_balances = np.clip(starting_balances, 0, None)  # Ensure non-negative balances

In [None]:
# Calculate ending balances after transaction
ending_balances = starting_balances.copy()
for i, transaction_type in enumerate(transaction_types):
    if transaction_type in ['CASH_OUT', 'DEBIT']:
        ending_balances[i] -= transaction_amounts[i]
    elif transaction_type in ['CASH_IN', 'PAYMENT']:
        ending_balances[i] += transaction_amounts[i]

In [None]:
# Generate destination account information for cash-out transactions
destination_accounts = np.where(transaction_types == 'CASH_OUT', np.random.randint(1000000, 9999999, size=num_transactions), None)

# Generate fraudulent transaction indicator
is_fraudulent = np.random.choice([0, 1], size=num_transactions, p=[0.95, 0.05])  # 5% fraud rate

In [None]:
# Create DataFrame
data = {
    'transaction_type': transaction_types,
    'amount': transaction_amounts,
    'starting_balance': starting_balances,
    'ending_balance': ending_balances,
    'destination_account': destination_accounts,
    'is_fraudulent': is_fraudulent
}

df = pd.DataFrame(data)

In [None]:
# Display first few rows of the dataset
print(df.head())

  transaction_type       amount  starting_balance  ending_balance  \
0            DEBIT  3806.051427          0.000000    -3806.051427   
1          PAYMENT   219.391265      39537.913862    39757.305127   
2          CASH_IN  4175.558510      32668.588911    36844.147421   
3            DEBIT  6826.947464      56980.245373    50153.297909   
4            DEBIT  6075.259806     115714.474274   109639.214468   

  destination_account  is_fraudulent  
0                None              0  
1                None              0  
2                None              0  
3                None              0  
4                None              0  


In [None]:
# Display last few rows of the dataset
print(df.tail())

     transaction_type       amount  starting_balance  ending_balance  \
9995          PAYMENT  3430.866015      27559.336588    30990.202602   
9996            DEBIT  7416.646572      42691.645578    35274.999006   
9997         CASH_OUT  5489.569239      17939.210311    12449.641072   
9998         CASH_OUT  6224.313614      22354.281917    16129.968303   
9999          CASH_IN  3202.252378      70842.797853    74045.050231   

     destination_account  is_fraudulent  
9995                None              0  
9996                None              0  
9997             4225012              0  
9998             5142900              0  
9999                None              1  


In [None]:
# Save the dataset to a CSV file
df.to_csv('financial_transactions.csv', index=False)