<a href="https://colab.research.google.com/github/nour-ezzehi/-Transaction-Fraud-Detection-System/blob/main/Transaction_Fraud_Detection_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [42]:
%pip install Faker
import pandas as pd
import numpy as np
from faker import Faker
import random
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
import xgboost as xgb
from sklearn.metrics import classification_report

fake = Faker()
random.seed(42)

n_transactions = 1000
user_ids = [fake.uuid4() for _ in range(n_transactions)]
timestamps = [fake.date_time_this_year() for _ in range(n_transactions)]
merchants = [fake.company() for _ in range(n_transactions)]
locations = [fake.country() for _ in range(n_transactions)]
amounts = [round(random.uniform(5.0, 1000.0), 2) for _ in range(n_transactions)]

fraudulent = [1 if random.random() < 0.05 else 0 for _ in range(n_transactions)]

data = {
    'user_id': user_ids,
    'timestamp': timestamps,
    'merchant': merchants,
    'location': locations,
    'amount': amounts,
    'fraudulent': fraudulent
}
df = pd.DataFrame(data)

df['timestamp'] = pd.to_datetime(df['timestamp'])

df['transaction_velocity'] = df.groupby('user_id')['timestamp'].diff().fillna(pd.Timedelta(seconds=0))

df['transaction_velocity'] = df['transaction_velocity'].dt.total_seconds()

df['geolocation_inconsistency'] = df.groupby('user_id')['location'].shift() != df['location']
df['geolocation_inconsistency'] = df['geolocation_inconsistency'].fillna(False).astype(int)

df = df.drop(columns=['user_id', 'merchant', 'timestamp'])

X = df.drop(columns=['fraudulent'])
y = df['fraudulent']

X = pd.get_dummies(X, columns=['location'])

X.fillna(0, inplace=True)

from imblearn.over_sampling import SMOTE
smote = SMOTE(sampling_strategy='minority')
X_res, y_res = smote.fit_resample(X, y)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

import xgboost as xgb
model = xgb.XGBClassifier(eval_metric="mlogloss", use_label_encoder=False)
model.fit(X_train_scaled, y_train)

from sklearn.metrics import classification_report
y_pred = model.predict(X_test_scaled)

print(classification_report(y_test, y_pred))

from sklearn.metrics import precision_recall_curve, auc
precision, recall, _ = precision_recall_curve(y_test, model.predict_proba(X_test_scaled)[:, 1])
pr_auc = auc(recall, precision)
print(f"Precision-Recall AUC: {pr_auc}")
model.save_model('fraud_detection_model.json')
np.save('scaler_mean.npy', scaler.mean_)
np.save('scaler_scale.npy', scaler.scale_)


              precision    recall  f1-score   support

           0       0.97      0.97      0.97       196
           1       0.97      0.97      0.97       187

    accuracy                           0.97       383
   macro avg       0.97      0.97      0.97       383
weighted avg       0.97      0.97      0.97       383

Precision-Recall AUC: 0.9980956244856721


In [40]:
%pip install Fastapi uvicorn

Collecting uvicorn
  Downloading uvicorn-0.34.2-py3-none-any.whl.metadata (6.5 kB)
Downloading uvicorn-0.34.2-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.5/62.5 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: uvicorn
Successfully installed uvicorn-0.34.2


In [43]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import xgboost as xgb

model = xgb.XGBClassifier(eval_metric="mlogloss", use_label_encoder=False)
model.load_model('fraud_detection_model.json')

scaler = StandardScaler()
scaler.mean_ = np.load('scaler_mean.npy')
scaler.scale_ = np.load('scaler_scale.npy')

app = FastAPI()

class Transaction(BaseModel):
    amount: float
    transaction_velocity: float
    geolocation_inconsistency: int
    location: str

@app.post("/predict/")
async def predict(transaction: Transaction):
    input_data = pd.DataFrame([{
        'amount': transaction.amount,
        'transaction_velocity': transaction.transaction_velocity,
        'geolocation_inconsistency': transaction.geolocation_inconsistency,
        'location': transaction.location,
    }])

    input_data = pd.get_dummies(input_data, columns=['location'])

    expected_columns = ['amount', 'transaction_velocity', 'geolocation_inconsistency'] + list(scaler.get_feature_names_out(input_data.columns))
    for col in expected_columns:
        if col not in input_data.columns:
            input_data[col] = 0
    input_scaled = scaler.transform(input_data)

    prediction = model.predict(input_scaled)

    if prediction[0] == 1:
        return {"prediction": "fraudulent"}
    else:
        return {"prediction": "non-fraudulent"}