In [7]:
!pip install faker pandas scikit-learn

from faker import Faker
import pandas as pd
import random
from datetime import datetime, timedelta
import numpy as np

fake = Faker()
Faker.seed(42)
random.seed(42)

def generate_transaction():
    return {
        "transaction_id": fake.uuid4(),
        "customer_id": fake.uuid4(),
        "amount": round(random.uniform(10, 10000), 2),
        "location": fake.country(),
        "timestamp": datetime.now() - timedelta(minutes=random.randint(0, 1440)),
        "device_id": fake.uuid4(),
        "merchant_type": random.choice(["Retail", "Online", "ATM", "Crypto", "Travel"]),
        "is_fraud": 0  # Default label
    }

# Simulating 500 transactions
df = pd.DataFrame([generate_transaction() for _ in range(500)])
df.head()




Unnamed: 0,transaction_id,customer_id,amount,location,timestamp,device_id,merchant_type,is_fraud
0,bdd640fb-0667-4ad1-9c80-317fa3b1799d,23b8c1e9-3924-46de-beb1-3b9046685257,6397.87,Saint Pierre and Miquelon,2025-06-29 10:42:55.387517,e465e150-bd9c-46b3-ad3c-2d6d1a3d1fa7,ATM,0
1,6c031199-972a-4469-9641-9f828b9d2434,37f8a88b-17fc-495a-87a0-ca6e0822e8f3,2456.47,Djibouti,2025-06-29 06:48:55.387561,8fadc1a6-06cb-4fb3-9a1d-e644815ef6d1,Retail,0
2,b38a088c-a65e-4389-b74d-0fb132e70629,72ff5d2a-386e-4be0-ab65-a6a48b8148f6,6770.23,Namibia,2025-06-28 16:57:55.387590,01a9e71f-de8a-474b-8f36-d58b47378190,Retail,0
3,b2b9437a-28df-4ec4-8e4a-2bbdc241330b,27cd8130-4722-4389-971a-a8766c307511,5909.02,Cuba,2025-06-29 10:28:55.387616,1a2a73ed-562b-4f79-8374-59eef50bea63,Retail,0
4,5be6128e-18c2-4797-a142-ea7d17be3111,43b7a3a6-9a8d-4a03-980d-7b71d8f56413,946.02,Sri Lanka,2025-06-29 03:37:55.387641,89463e85-759c-4e66-bacf-b3d00b1f9163,Travel,0


In [8]:
# Addition of synthetic frauds: large amounts + odd hours + new device/location
for i in random.sample(range(500), 20):
    df.at[i, 'amount'] = round(random.uniform(7000, 12000), 2)
    df.at[i, 'timestamp'] = datetime.now().replace(hour=random.choice([1, 2, 3]))
    df.at[i, 'location'] = random.choice(["Nigeria", "Russia", "North Korea"])
    df.at[i, 'merchant_type'] = "Crypto"
    df.at[i, 'is_fraud'] = 1

df['hour'] = df['timestamp'].dt.hour
df.head(10)


Unnamed: 0,transaction_id,customer_id,amount,location,timestamp,device_id,merchant_type,is_fraud,hour
0,bdd640fb-0667-4ad1-9c80-317fa3b1799d,23b8c1e9-3924-46de-beb1-3b9046685257,6397.87,Saint Pierre and Miquelon,2025-06-29 10:42:55.387517,e465e150-bd9c-46b3-ad3c-2d6d1a3d1fa7,ATM,0,10
1,6c031199-972a-4469-9641-9f828b9d2434,37f8a88b-17fc-495a-87a0-ca6e0822e8f3,2456.47,Djibouti,2025-06-29 06:48:55.387561,8fadc1a6-06cb-4fb3-9a1d-e644815ef6d1,Retail,0,6
2,b38a088c-a65e-4389-b74d-0fb132e70629,72ff5d2a-386e-4be0-ab65-a6a48b8148f6,6770.23,Namibia,2025-06-28 16:57:55.387590,01a9e71f-de8a-474b-8f36-d58b47378190,Retail,0,16
3,b2b9437a-28df-4ec4-8e4a-2bbdc241330b,27cd8130-4722-4389-971a-a8766c307511,5909.02,Cuba,2025-06-29 10:28:55.387616,1a2a73ed-562b-4f79-8374-59eef50bea63,Retail,0,10
4,5be6128e-18c2-4797-a142-ea7d17be3111,43b7a3a6-9a8d-4a03-980d-7b71d8f56413,946.02,Sri Lanka,2025-06-29 03:37:55.387641,89463e85-759c-4e66-bacf-b3d00b1f9163,Travel,0,3
5,60e7a113-ec1b-4ca1-b91e-1d4c1ff49b78,d453dd32-4b0d-4b41-8d52-88f1142c3fe8,6024.17,Niue,2025-06-28 16:24:55.387666,5c941cf0-dc98-42c1-a2ac-f72f9e574f7a,Online,0,16
6,11ce5dd2-b45e-41f0-b139-d32c93cd59bf,c5e7ce8a-3a57-4a8e-a948-8d990bbb2599,7163.04,French Guiana,2025-06-28 11:37:55.387690,3b982ef8-daf6-4a26-946d-3f31fc377a4c,Travel,0,11
7,47294739-614f-43d7-99db-3ad0ddd1dfb2,5d65a441-d588-42de-a2bc-372f7412b293,4201.0,Central African Republic,2025-06-28 20:14:55.387716,ab9099a4-35a2-40ae-9af3-05535ec42e08,Travel,0,20
8,aefcfad8-efc8-4849-b3aa-7efe4458a885,a28defe3-9bf0-4273-9247-6f57a5e5a5ab,2789.13,Chile,2025-06-29 11:20:55.387741,29d4beef-3eab-4dcb-baa8-0dd488bd6407,Online,0,11
9,fd5166e6-451b-4cf3-a123-fdf77656af72,8e944239-b02b-41c4-a3d7-0628ece66fa2,6984.41,Cyprus,2025-06-28 23:57:55.387766,c4b032cc-d7c5-44a5-9304-317faf42e12f,ATM,0,23


In [9]:
# Basic fraud indicators
df['amount_over_5000'] = df['amount'] > 5000
df['odd_hour'] = df['hour'].isin([0,1,2,3,4])
df['suspicious_location'] = df['location'].isin(["Nigeria", "Russia", "North Korea"])
df['crypto_use'] = df['merchant_type'] == "Crypto"

# Final features
features = ['amount_over_5000', 'odd_hour', 'suspicious_location', 'crypto_use']
df[features + ['is_fraud']].head()


Unnamed: 0,amount_over_5000,odd_hour,suspicious_location,crypto_use,is_fraud
0,True,False,False,False,0
1,False,False,False,False,0
2,True,False,False,False,0
3,True,False,False,False,0
4,False,True,False,False,0


In [10]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

X = df[features]
y = df['is_fraud']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluation
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        96
           1       1.00      1.00      1.00         4

    accuracy                           1.00       100
   macro avg       1.00      1.00      1.00       100
weighted avg       1.00      1.00      1.00       100



In [11]:
# Simulating new incoming transactions (mini real-time stream)
def simulate_new_transaction():
    tx = generate_transaction()
    hour = tx['timestamp'].hour
    tx['amount_over_5000'] = tx['amount'] > 5000
    tx['odd_hour'] = hour in [0,1,2,3,4]
    tx['suspicious_location'] = tx['location'] in ["Russia", "Nigeria", "North Korea"]
    tx['crypto_use'] = tx['merchant_type'] == "Crypto"

    tx_features = [[
        tx['amount_over_5000'],
        tx['odd_hour'],
        tx['suspicious_location'],
        tx['crypto_use']
    ]]

    fraud_prob = model.predict_proba(tx_features)[0][1]
    tx['fraud_probability'] = round(fraud_prob, 2)
    tx['flagged_as_fraud'] = fraud_prob > 0.5
    return tx

# Simulating 10 new transactions
new_txs = [simulate_new_transaction() for _ in range(10)]
pd.DataFrame(new_txs)[['transaction_id', 'amount', 'location', 'merchant_type', 'fraud_probability', 'flagged_as_fraud']]




Unnamed: 0,transaction_id,amount,location,merchant_type,fraud_probability,flagged_as_fraud
0,b9ff2c29-57d9-4bb0-b09e-47bad0e656e5,780.88,Northern Mariana Islands,Travel,0.0,False
1,4cd99164-81df-42f4-9bc6-77caabff9286,7711.04,Turkey,Retail,0.0,False
2,423701f1-bdac-4c5e-a0c0-f2757885187d,6622.67,United States Minor Outlying Islands,Online,0.0,False
3,38e59b27-16dc-4a6e-a789-077055ec024a,5976.49,Gambia,Online,0.0,False
4,4b46ef33-95e7-4614-82b4-02815f269466,1816.65,Austria,Travel,0.0,False
5,78e36397-123b-41cf-9ee3-39fff1bdb598,8092.35,Trinidad and Tobago,Crypto,0.0,False
6,daaf7529-deef-482a-8dd7-015869c165ae,446.49,Guam,Online,0.0,False
7,24aa5685-7523-46ce-bc95-0bbae8184f34,9696.19,Slovakia (Slovak Republic),ATM,0.0,False
8,a26e865b-503b-4788-ad44-5af8fc9476b1,7526.39,Cote d'Ivoire,Online,0.0,False
9,c90394ea-7d54-4c82-a88f-3435c3b534db,5344.77,Nicaragua,Crypto,0.0,False


In [12]:
# Exporting full dataset for dashboard
df.to_csv("all_transactions_with_flags.csv", index=False)
print("Saved as all_transactions_with_flags.csv")


Saved as all_transactions_with_flags.csv
