In [5]:
import pandas as pd

# Load the cleaned dataset (adjust path as needed)
df = pd.read_csv('cleaned_digital_wallet_transactions.csv')

# Initialize a list to collect all flagged data in a structured format
flagged_data = []

# Fraud Indicator 1: High Transaction Amounts with Statistical Threshold
threshold_amount = 5000
mean_amount = df['product_amount'].mean()
std_amount = df['product_amount'].std()
upper_limit = mean_amount + 3 * std_amount  # 3-sigma threshold

high_transaction_flag = df['product_amount'] > upper_limit

if high_transaction_flag.any():
    for _, row in df[high_transaction_flag].iterrows():
        flagged_data.append({
            "Indicator": "High Transaction Amount",
            "User ID": row['user_id'],
            "Transaction ID": row['transaction_id'],
            "Amount": row['product_amount'],
            "Date": row['transaction_date'],
            "Reason": f"Exceeds 3-sigma threshold (Mean: {mean_amount:.2f}, Std Dev: {std_amount:.2f})"
        })

# Fraud Indicator 2: Frequent Transactions in Short Time Periods
df['transaction_date'] = pd.to_datetime(df['transaction_date'])
df = df.sort_values(by=['user_id', 'transaction_date'])
df['time_diff'] = df.groupby('user_id')['transaction_date'].diff().dt.total_seconds() / 60  # in minutes
frequent_transaction_flag = (df['time_diff'] < 10)

if frequent_transaction_flag.any():
    for _, row in df[frequent_transaction_flag].iterrows():
        flagged_data.append({
            "Indicator": "Frequent Transactions",
            "User ID": row['user_id'],
            "Transaction ID": row['transaction_id'],
            "Amount": row['product_amount'],
            "Date": row['transaction_date'],
            "Reason": f"Consecutive transaction within {int(row['time_diff'])} minutes"
        })

# Fraud Indicator 3: Unusual Location for User's Transactions
df['most_common_location'] = df.groupby('user_id')['location'].transform(lambda x: x.mode()[0])
unusual_location_flag = df['location'] != df['most_common_location']

if unusual_location_flag.any():
    for _, row in df[unusual_location_flag].iterrows():
        flagged_data.append({
            "Indicator": "Unusual Location",
            "User ID": row['user_id'],
            "Transaction ID": row['transaction_id'],
            "Location": row['location'],
            "Date": row['transaction_date'],
            "Reason": f"Location mismatch (Usual: {row['most_common_location']})"
        })

# Fraud Indicator 4: Irregular Product Category Purchases
df['usual_product_category'] = df.groupby('user_id')['product_category'].transform(lambda x: x.mode()[0])
irregular_product_flag = df['product_category'] != df['usual_product_category']

if irregular_product_flag.any():
    for _, row in df[irregular_product_flag].iterrows():
        flagged_data.append({
            "Indicator": "Irregular Product Category",
            "User ID": row['user_id'],
            "Transaction ID": row['transaction_id'],
            "Product Category": row['product_category'],
            "Date": row['transaction_date'],
            "Reason": f"Outside usual category (Usual: {row['usual_product_category']})"
        })

# Fraud Indicator 5: Multiple Payment Methods for a Single User
payment_method_count = df.groupby('user_id')['payment_method'].nunique()
multiple_payment_method_users = payment_method_count[payment_method_count > 1].index
multiple_payment_method_flag = df['user_id'].isin(multiple_payment_method_users)

if multiple_payment_method_flag.any():
    for _, row in df[multiple_payment_method_flag].iterrows():
        flagged_data.append({
            "Indicator": "Multiple Payment Methods",
            "User ID": row['user_id'],
            "Transaction ID": row['transaction_id'],
            "Payment Method": row['payment_method'],
            "Date": row['transaction_date'],
            "Reason": "Multiple payment methods detected"
        })

# Convert the flagged data into a DataFrame for easy export
flagged_df = pd.DataFrame(flagged_data)

# Save the compiled flagged data to a CSV file
flagged_df.to_csv('fraud_detection_report.csv', index=False)

print("Fraud detection report has been saved to 'fraud_detection_report.csv'")


Fraud detection report has been saved to 'fraud_detection_report.csv'
