In [3]:
import pandas as pd
import numpy as np

In [6]:
# Refine the fraud detection logic by incorporating extracted criteria

# Reload the cleaned dataset
cleaned_data = pd.read_csv('cleaned_digital_wallet_transactions.csv')
cleaned_data['transaction_date'] = pd.to_datetime(cleaned_data['transaction_date'])

# Extract features for fraud detection based on the combined criteria
# 1. High Transaction Amounts (Statistical and Absolute Threshold)
threshold_amount = 5000  # Absolute threshold for high transaction amounts
mean_amount = cleaned_data['product_amount'].mean()
std_amount = cleaned_data['product_amount'].std()
upper_limit = mean_amount + 3 * std_amount  # 3-sigma threshold

cleaned_data['high_transaction_flag'] = cleaned_data['product_amount'] > upper_limit

# 2. Frequent Transactions in Short Time Periods
cleaned_data = cleaned_data.sort_values(by=['user_id', 'transaction_date'])
cleaned_data['time_diff'] = cleaned_data.groupby('user_id')['transaction_date'].diff().dt.total_seconds() / 60  # in minutes
frequent_transaction_window = 15  # Extended window for broader criteria
cleaned_data['frequent_transaction_flag'] = (cleaned_data['time_diff'] < frequent_transaction_window)

# 3. Unusual Location for User's Transactions
cleaned_data['most_common_location'] = cleaned_data.groupby('user_id')['location'].transform(lambda x: x.mode()[0])
cleaned_data['unusual_location_flag'] = cleaned_data['location'] != cleaned_data['most_common_location']

# 4. Irregular Product Category Purchases
cleaned_data['usual_product_category'] = cleaned_data.groupby('user_id')['product_category'].transform(lambda x: x.mode()[0])
cleaned_data['irregular_product_flag'] = cleaned_data['product_category'] != cleaned_data['usual_product_category']

# 5. Multiple Payment Methods for a Single User
payment_method_count = cleaned_data.groupby('user_id')['payment_method'].nunique()
multiple_payment_method_users = payment_method_count[payment_method_count > 1].index
cleaned_data['multiple_payment_method_flag'] = cleaned_data['user_id'].isin(multiple_payment_method_users)

# Combine all refined fraud flags for a comprehensive fraud detection report
cleaned_data['nested_fraud_flag'] = (
    cleaned_data['high_transaction_flag'] |
    cleaned_data['frequent_transaction_flag'] |
    cleaned_data['unusual_location_flag'] |
    cleaned_data['irregular_product_flag'] |
    cleaned_data['multiple_payment_method_flag']
)

# Select flagged transactions for review
flagged_transactions = cleaned_data[cleaned_data['nested_fraud_flag']]

# Save the flagged transactions for analysis
flagged_transactions_path = 'refined_flagged_transactions.csv'
flagged_transactions.to_csv(flagged_transactions_path, index=False)

flagged_transactions_path


'refined_flagged_transactions.csv'