In [None]:
%%writefile readme.md


#  FraudGuard Real-Time Transaction Fraud Detector

**A production-ready, explainable AI system that detects fraudulent transactions in real time — and tells you *why*.**

Built with Python, XGBoost, SHAP, and Gradio, **FraudGuard** combines supervised and unsupervised learning to deliver accurate, transparent, and instant fraud predictions. Perfect for fintech, payment gateways, or any transaction-based platform.


##  Why This Stands Out

 **End-to-End ML Pipeline**  From synthetic data generation to real-time inference
 **Explainable AI (XAI)**  SHAP-powered waterfall plots show *exactly why* a transaction was flagged
 **Dual-Model Intelligence** Combines XGBoost (supervised) + Isolation Forest (anomaly detection)
 **Production-Ready** Modular code, model persistence, and Gradio UI for instant interaction
 **Hiring-Ready Project**  Demonstrates skills in ML, engineering, and business impact

 *This isn't just a notebook — it's a deployable fraud detection service.*

---

##  How It Works

### 1. **Synthetic Data Engine**
Generates realistic transaction data with:
- Log-normal amounts
- Risky countries (Nigeria, Russia, China)
- High-risk categories (Crypto, Gambling)
- 5% fraud rate (realistic imbalance)

### 2. **Dual-Model Detection**
| Model | Type | Role |
|------|------|------|
| **XGBoost Classifier** | Supervised | Predicts fraud probability |
| **Isolation Forest** | Unsupervised | Detects anomalous behavior |

### 3. **Explainability with SHAP**
Every prediction comes with a **waterfall plot** showing:
- Which features pushed the decision toward fraud
- Feature impact in real monetary/behavioral terms
- Transparency for compliance & trust

### 4. **Real-Time Gradio App**
Interactive web interface where users can:
- Input transaction details
- Get instant risk assessment
- See model reasoning (no black box!)

---

##  Sample Output


Overwriting readme.md


In [None]:
%%writefile requirements.txt
pandas
numpy
scikit-learn
xgboost
shap
gradio
joblib
matplotlib

Overwriting requirements.txt


In [None]:
!pip install -r requirements.txt

In [None]:
%%writefile parser.py

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

def prase_transaction_data(file_path):
  """prase and clean transaction data"""
  df = pd.read_csv(file_path)

  df = df.dropna()
  df['hour'] = pd.to_datetime(df['timestamp']).dt.hour
  df['amount_log'] = np.log1p(df['amount'])
  df['is_high_risk_country'] = df['country'].apply(lambda x: 1 if x in ["Nigeria", "Russia", "China"] else 0)

  return df

def preprocess_for_model(df):
    """Prepare data for fraud detection model"""
    features = ['amount_log', 'hour','is_high_risk_country','merchant_category']
    X = df[features]
    y = df.get('fraud_label', None)

    #One-hot encode category
    X = pd.get_dummies(X,columns=['merchant_category'],drop_first=True)

    #Normalize
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    return X_scaled, y



Overwriting parser.py


In [None]:
%%writefile fraud_model.py

import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import shap
import joblib

def generate_fraud_dataset(num_samples=10000):
    # Base data
    amount = np.random.lognormal(3, 0.5, num_samples)
    hour = np.random.randint(0, 24, num_samples)
    country = np.random.choice(["US", "Nigeria", "Russia", "China", "UK"], num_samples)
    merchant_category = np.random.choice(["Retail", "Health", "Crypto", "Gambling", "Travel"], num_samples)
    is_weekend = np.random.choice([0, 1], num_samples)


    fraud_risk = np.zeros(num_samples)


    fraud_risk += (amount > 1000).astype(float) * 0.3


    high_risk_countries = ["Nigeria", "Russia", "China"]
    fraud_risk += np.isin(country, high_risk_countries).astype(float) * 0.3


    risky_merchants = ["Crypto", "Gambling"]
    fraud_risk += np.isin(merchant_category, risky_merchants).astype(float) * 0.3


    fraud_risk += ((hour >= 2) & (hour <= 5)).astype(float) * 0.1

    # Combine and cap at 0.95
    fraud_risk = np.clip(fraud_risk, 0, 0.95)

    # Generate target: higher fraud_risk → higher chance of fraud
    target = (np.random.rand(num_samples) < fraud_risk).astype(int)

    return pd.DataFrame({
        "amount": amount,
        "hour": hour,
        "country": country,
        "merchant_category": merchant_category,
        "is_weekend": is_weekend,
        "target": target
    })

    # Train models
df = generate_fraud_dataset()
df['amount_log'] = np.log1p(df['amount'])
df = pd.get_dummies(df, columns=["country", "merchant_category"])
for col in df.columns:
    if df[col].dtype == 'bool':
        df[col] = df[col].astype(int)

X = df.drop("target", axis=1)
y = df["target"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Isolation Forest
iso_forest = IsolationForest(contamination=0.05, random_state=42)
iso_forest.fit(X_train)
joblib.dump(iso_forest, "iso_forest.pkl")

# Train XGBoost
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
xgb.fit(X_train, y_train)


xgb.get_booster().save_model("xgb_fraud.json")

joblib.dump(X_train.columns.tolist(), "train_columns.pkl")

# Evaluate
preds = xgb.predict(X_test)
print(classification_report(y_test, preds))
print(" Models saved: iso_forest.pkl, xgb_fraud.json, train_columns.pkl")



Overwriting fraud_model.py


In [None]:
!ls -l *.pkl

-rw-r--r-- 1 root root 1911161 Jul 31 09:42 iso_forest.pkl
-rw-r--r-- 1 root root     272 Jul 31 09:42 train_columns.pkl
-rw-r--r-- 1 root root 1150093 Jul 31 09:03 xgb_explainer.pkl
-rw-r--r-- 1 root root  350795 Jul 31 09:03 xgb_fraud.pkl


In [None]:
%%writefile app.py

import gradio as gr
import joblib
import pandas as pd
import shap
import xgboost as xgb
from xgboost import XGBClassifier
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import os

try:
    iso_forest = joblib.load("iso_forest.pkl")
    # Load XGBoost from JSON
    xgb = XGBClassifier()
    xgb.load_model("xgb_fraud.json")

    # Load training columns
    train_cols = joblib.load("train_columns.pkl")

except FileNotFoundError as e:
    raise FileNotFoundError(f"File missing: {e}. Did you run fraud_model.py?")


explainer = shap.Explainer(xgb, pd.DataFrame(np.zeros((1, len(train_cols))), columns=train_cols))

def predict_fraud(amount, hour, country, merchant_category, is_weekend):
    try:
        amount = float(amount)
        hour = int(hour)
        is_weekend = int(is_weekend)
    except ValueError:
        return " Invalid input: Please enter valid numbers.", None

    input_data = pd.DataFrame({
        "amount": [amount],
        "hour": [hour],
        "is_weekend": [is_weekend],
        "country": [country],
        "merchant_category": [merchant_category]
    })

    input_data['amount_log'] = np.log1p(input_data['amount'])
    input_data = pd.get_dummies(input_data, columns=["country", "merchant_category"])
    input_data = input_data.reindex(columns=train_cols, fill_value=0)

    risk_score = iso_forest.score_samples(input_data)[0]
    prediction = xgb.predict(input_data)[0]


    shap_values = explainer(input_data)
    fig, ax = plt.subplots(figsize=(8, 5))
    shap.plots.waterfall(shap_values[0], max_display=6, show=False)
    plt.tight_layout()
    plt.close()

    if prediction == 1:
        return f" FRAUD DETECTED! Anomaly Score: {risk_score:.3f}", fig
    else:
        return f" No Fraud. Anomaly Score: {risk_score:.3f}", fig


# Gradio Interface
with gr.Blocks(title="FraudGuard", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    #  FraudGuard  Real-Time Transaction Fraud Detector
    Enter transaction details below. FraudGuard uses AI to detect and **explain** fraud risk.
    """)

    with gr.Row():
        amount = gr.Number(label="Transaction Amount ($)", value=100.0)
        hour = gr.Slider(0, 23, step=1, label="Hour of Day", value=14)
        country = gr.Dropdown(["US", "Nigeria", "Russia", "China", "UK"], label="Country", value="US")
        merchant_category = gr.Dropdown(["Retail", "Health", "Crypto", "Gambling", "Travel"],
                                        label="Merchant Category", value="Retail")
        is_weekend = gr.Checkbox(label="Is Weekend?")

    output = gr.Textbox(label="Risk Status")
    explanation = gr.Plot(label="Why This Decision? (SHAP Explanation)")

    submit_btn = gr.Button(" Analyze Transaction")
    submit_btn.click(
        fn=predict_fraud,
        inputs=[amount, hour, country, merchant_category, is_weekend],
        outputs=[output, explanation]
    )

if __name__ == "__main__":
    demo.launch(share=True)




Overwriting app.py


In [None]:
import shutil
import os

os.makedirs("/content/drive/MyDrive/FraudGuard_Project", exist_ok=True)
shutil.copy("parser.py", "/content/drive/MyDrive/FraudGuard_Project/parser.py")
shutil.copy("fraud_model.py", "/content/drive/MyDrive/FraudGuard_Project/fraud_model.py")
shutil.copy("app.py", "/content/drive/MyDrive/FraudGuard_Project/app.py")

print("All things are safe")

All things are safe


In [None]:
!python app.py

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://855058e9f4b138d96a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
Keyboard interruption in main thread... closing server.
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/gradio/blocks.py", line 3107, in block_thread
    time.sleep(0.1)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/content/app.py", line 91, in <module>
    demo.launch(share=True)
  File "/usr/local/lib/python3.11/dist-packages/gradio/blocks.py", line 3013, in launch
    self.block_thread()
  File "/usr/local/lib/python3.11/dist-packages/gradio/blocks.py", line 3111, in block_thread
    self.server.close()
  File "/usr/local/lib/python3.11/dist-packages