In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

n_samples = 5000
fraud_ratio = 0.03
locations = ['Mumbai', 'Delhi', 'Bangalore', 'Hyderabad', 'Chennai', 'Kolkata', 'Pune', 'Ahmedabad']

def generate_data(n):
    data = []
    for i in range(n):
        timestamp = datetime(2024, 1, 1) + timedelta(minutes=random.randint(0, 525600))
        amount = round(random.uniform(10, 50000), 2)
        transaction_type = random.choice(['send', 'receive', 'merchant_payment'])
        location = random.choice(locations)
        device_type = random.choice(['mobile', 'tablet'])
        is_rooted_device = np.random.choice([0, 1], p=[0.95, 0.05])
        network_type = random.choice(['WiFi', '4G', '5G'])
        hour = timestamp.hour
        time_of_day = (
            'night' if hour < 6 else
            'morning' if hour < 12 else
            'afternoon' if hour < 18 else
            'evening'
        )
        is_fraud = 1 if (
            amount > 25000 and
            is_rooted_device == 1 and
            transaction_type == 'send' and
            time_of_day in ['night', 'evening']
        ) else 0
        if random.random() < fraud_ratio:
            is_fraud = 1
        data.append([
            i + 1, timestamp, f"user_{random.randint(1, 5000)}", f"user_{random.randint(1, 5000)}",
            amount, transaction_type, location, device_type, is_rooted_device,
            network_type, time_of_day, is_fraud
        ])
    return pd.DataFrame(data, columns=[
        'transaction_id', 'timestamp', 'user_id', 'receiver_id', 'amount',
        'transaction_type', 'location', 'device_type', 'is_rooted_device',
        'network_type', 'time_of_day', 'is_fraud'
    ])

df = generate_data(n_samples)
df.to_csv('upi_fraud_data.csv', index=False)
print("Saved as upi_fraud_data.csv")


Saved as upi_fraud_data.csv
