In [None]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# Define Kenyan locations and M-Pesa specific transaction types
locations = ['Nairobi', 'Mombasa', 'Kisumu', 'Eldoret', 'Nakuru', 'Thika', 'Garissa', 'Machakos', 'Nyeri', 'Meru']
transaction_types = ['Send Money', 'Buy Airtime', 'Pay Bill', 'Lipa na M-Pesa', 'Withdraw Cash', 'Deposit Cash']
device_types = ['Android', 'iOS', 'Feature Phone']
network_providers = ['Safaricom', 'Airtel', 'Telkom Kenya']
user_types = ['individual', 'agent']

# Generate synthetic dataset
n = 10000

np.random.seed(42)

def random_date():
    start = datetime(2024, 6, 1)
    end = datetime(2024, 6, 30)
    return start + timedelta(seconds=random.randint(0, int((end - start).total_seconds())))

data = {
    'transaction_id': [f"TX{100000 + i}" for i in range(n)],
    'user_id': [f"user_{np.random.randint(1000, 9999)}" for _ in range(n)],
    'transaction_type': np.random.choice(transaction_types, n),
    'amount': np.random.exponential(scale=3500, size=n).round(2),
    'location': np.random.choice(locations, n),
    'device_type': np.random.choice(device_types, n),
    'network_provider': np.random.choice(network_providers, n),
    'user_type': np.random.choice(user_types, n, p=[0.85, 0.15]),
    'time_of_day': np.random.choice(['morning', 'afternoon', 'evening', 'night'], n),
    'is_foreign_number': np.random.choice([0, 1], n, p=[0.97, 0.03]),
    'is_sim_recently_swapped': np.random.choice([0, 1], n, p=[0.95, 0.05]),
    'has_multiple_accounts': np.random.choice([0, 1], n, p=[0.9, 0.1]),
    'datetime': [random_date() for _ in range(n)],
}

df_kenya = pd.DataFrame(data)
df_kenya.head()


Unnamed: 0,transaction_id,user_id,transaction_type,amount,location,device_type,network_provider,user_type,time_of_day,is_foreign_number,is_sim_recently_swapped,has_multiple_accounts,datetime
0,TX100000,user_8270,Withdraw Cash,2646.35,Nakuru,Feature Phone,Telkom Kenya,individual,night,0,0,0,2024-06-16 21:45:13
1,TX100001,user_1860,Send Money,2844.69,Garissa,iOS,Safaricom,agent,night,0,0,0,2024-06-05 00:49:25
2,TX100002,user_6390,Deposit Cash,2384.46,Nyeri,Feature Phone,Telkom Kenya,agent,afternoon,0,0,1,2024-06-13 15:54:02
3,TX100003,user_6191,Withdraw Cash,1846.01,Nairobi,iOS,Safaricom,individual,night,0,0,1,2024-06-10 01:05:49
4,TX100004,user_6734,Send Money,1017.6,Machakos,Feature Phone,Telkom Kenya,individual,morning,0,0,0,2024-06-27 02:28:53


In [None]:
df_kenya.to_csv('kenya_fraud_detection.csv')

In [1]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# Set random seed for reproducibility
np.random.seed(42)

# Simulate parameters
n_transactions = 10000
user_ids = [f"U{str(i).zfill(4)}" for i in range(1, 1001)]
device_ids = [f"D{str(i).zfill(3)}" for i in range(1, 201)]
agent_ids = [f"A{str(i).zfill(3)}" for i in range(1, 301)]
locations = ["Ikeja", "Yaba", "Ajah", "Kano", "Aba", "Makurdi", "Portharcout", "Owerri"]
transaction_types = ["send", "receive", "cash_in", "cash_out"]
channels = ["USSD", "App", "Agent", "Web"]

# Generate timestamps
start_date = datetime(2024, 1, 1)
timestamps = [start_date + timedelta(minutes=np.random.randint(0, 60*24*180)) for _ in range(n_transactions)]

# Generate transactions
data = {
    "transaction_id": [f"T{str(i).zfill(6)}" for i in range(n_transactions)],
    "user_id": np.random.choice(user_ids, n_transactions),
    "transaction_type": np.random.choice(transaction_types, n_transactions, p=[0.35, 0.35, 0.15, 0.15]),
    "amount": np.round(np.random.exponential(scale=5000, size=n_transactions), 2),
    "device_id": np.random.choice(device_ids, n_transactions),
    "location": np.random.choice(locations, n_transactions),
    "timestamp": timestamps,
    "channel": np.random.choice(channels, n_transactions, p=[0.5, 0.3, 0.15, 0.05]),
    "agent_id": np.random.choice(agent_ids, n_transactions),
    "sim_swap_flag": np.random.choice([0, 1], n_transactions, p=[0.98, 0.02])
}

df = pd.DataFrame(data)

# Derive balance fields
df["balance_before"] = np.round(np.random.uniform(1000, 100000, size=n_transactions), 2)
df["balance_after"] = df["balance_before"] - df["amount"]
df["balance_after"] = df["balance_after"].apply(lambda x: x if x > 0 else np.random.uniform(0, 1000))


# Add transaction velocity: simulate past 1-hour tx count for each user
df["transaction_velocity"] = np.random.poisson(lam=1.2, size=n_transactions)

# Save to CSV
df.to_csv('mobile_money_transactions.csv', index=False)
