In [2]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# Define Kenyan locations and M-Pesa specific transaction types
locations = ['Nairobi', 'Mombasa', 'Kisumu', 'Eldoret', 'Nakuru', 'Thika', 'Garissa', 'Machakos', 'Nyeri', 'Meru']
transaction_types = ['Send Money', 'Buy Airtime', 'Pay Bill', 'Lipa na M-Pesa', 'Withdraw Cash', 'Deposit Cash']
device_types = ['Android', 'iOS', 'Feature Phone']
network_providers = ['Safaricom', 'Airtel', 'Telkom Kenya']
user_types = ['individual', 'agent']

# Generate synthetic dataset
n = 5000
np.random.seed(42)

def random_date():
    start = datetime(2024, 6, 1)
    end = datetime(2024, 6, 30)
    return start + timedelta(seconds=random.randint(0, int((end - start).total_seconds())))

data = {
    'transaction_id': [f"TX{100000 + i}" for i in range(n)],
    'user_id': [f"user_{np.random.randint(1000, 9999)}" for _ in range(n)],
    'transaction_type': np.random.choice(transaction_types, n),
    'amount': np.random.exponential(scale=3500, size=n).round(2),
    'location': np.random.choice(locations, n),
    'device_type': np.random.choice(device_types, n),
    'network_provider': np.random.choice(network_providers, n),
    'user_type': np.random.choice(user_types, n, p=[0.85, 0.15]),
    'time_of_day': np.random.choice(['morning', 'afternoon', 'evening', 'night'], n),
    'is_foreign_number': np.random.choice([0, 1], n, p=[0.97, 0.03]),
    'is_sim_recently_swapped': np.random.choice([0, 1], n, p=[0.95, 0.05]),
    'has_multiple_accounts': np.random.choice([0, 1], n, p=[0.9, 0.1]),
    'datetime': [random_date() for _ in range(n)],
}

df_kenya = pd.DataFrame(data)
df_kenya.head()


Unnamed: 0,transaction_id,user_id,transaction_type,amount,location,device_type,network_provider,user_type,time_of_day,is_foreign_number,is_sim_recently_swapped,has_multiple_accounts,datetime
0,TX100000,user_8270,Withdraw Cash,2359.83,Kisumu,Android,Safaricom,individual,evening,0,0,0,2024-06-26 16:58:16
1,TX100001,user_1860,Lipa na M-Pesa,1042.99,Eldoret,iOS,Telkom Kenya,agent,morning,0,0,0,2024-06-10 04:15:22
2,TX100002,user_6390,Withdraw Cash,7567.65,Meru,Feature Phone,Safaricom,agent,afternoon,0,0,0,2024-06-02 23:48:44
3,TX100003,user_6191,Lipa na M-Pesa,3011.51,Nakuru,Android,Telkom Kenya,individual,evening,0,1,0,2024-06-06 00:18:47
4,TX100004,user_6734,Withdraw Cash,874.77,Nakuru,Feature Phone,Airtel,individual,night,0,0,0,2024-06-11 15:16:08


In [3]:
df_kenya.to_csv('kenya_fraud_detection.csv')