# 💳 Credit Card Fraud Detection Dashboard

🧪 You can try the app live here:  
🔗 [https://mderouiche7-credit-fraud.hf.space](https://mderouiche7-credit-fraud.hf.space)

📊 Built with Gradio, XGBoost, SHAP.  
👤 Author: [Mohamed Derouiche](https://www.linkedin.com/in/mohamed-derouiche-ba1843294)


In [14]:
import numpy as np
import pandas as pd
import joblib
import shap
import matplotlib.pyplot as plt
import gradio as gr
from sklearn.model_selection import train_test_split


In [15]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [16]:
from google.colab import files
files.upload()  # upload scaler.pkl from your computer

Saving scaler.pkl to scaler (3).pkl


{'scaler (3).pkl': b'\x80\x04\x95=\x01\x00\x00\x00\x00\x00\x00\x8c\x1bsklearn.preprocessing._data\x94\x8c\x0eStandardScaler\x94\x93\x94)\x81\x94}\x94(\x8c\twith_mean\x94\x88\x8c\x08with_std\x94\x88\x8c\x04copy\x94\x88\x8c\x11feature_names_in_\x94\x8c\x13joblib.numpy_pickle\x94\x8c\x11NumpyArrayWrapper\x94\x93\x94)\x81\x94}\x94(\x8c\x08subclass\x94\x8c\x05numpy\x94\x8c\x07ndarray\x94\x93\x94\x8c\x05shape\x94K\x02\x85\x94\x8c\x05order\x94\x8c\x01C\x94\x8c\x05dtype\x94h\x0f\x8c\x05dtype\x94\x93\x94\x8c\x02O8\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK?t\x94b\x8c\nallow_mmap\x94\x89\x8c\x1bnumpy_array_alignment_bytes\x94K\x10ub\x80\x05\x95\x9a\x00\x00\x00\x00\x00\x00\x00\x8c\x16numpy._core.multiarray\x94\x8c\x0c_reconstruct\x94\x93\x94\x8c\x05numpy\x94\x8c\x07ndarray\x94\x93\x94K\x00\x85\x94C\x01b\x94\x87\x94R\x94(K\x01K\x02\x85\x94h\x03\x8c\x05dtype\x94\x93\x94\x8c\x02O8\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK

In [17]:
#Uploasing model XGBoost best performer and the scaler:
model = joblib.load("/content/drive/MyDrive/models/xgboost_model.pkl")
scaler = joblib.load("scaler.pkl")

In [18]:
import kagglehub
# Download dataset from Kaggle
path = kagglehub.dataset_download("mlg-ulb/creditcardfraud")
# Load the dataset using the full path
df = pd.read_csv(f"{path}/creditcard.csv")

# Split features and target
X = df.drop("Class", axis=1)
y = df["Class"]

# Split train/test (use same random_state as training for consistency)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Now, instead of fitting scaler again, just transform Time and Amount using loaded scaler:
X_test_scaled = X_test.copy()
X_test_scaled[['Time', 'Amount']] = scaler.transform(X_test_scaled[['Time', 'Amount']])

# Now X_test_scaled is ready for predictions or SHAP analysis

In [22]:

# Load model and scaler (adjust paths)

explainer = shap.TreeExplainer(model)

# Feature names (exclude target)
feature_names = [
    "Time", "V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8",
    "V9", "V10", "V11", "V12", "V13", "V14", "V15", "V16",
    "V17", "V18", "V19", "V20", "V21", "V22", "V23", "V24",
    "V25", "V26", "V27", "V28", "Amount"
]

# Define UI inputs with sliders with step=0.01 for decimal precision
def get_inputs():
    return [
        gr.Slider(minimum=0, maximum=172792, step=1, value=50000, label="Time (seconds)"),
        *[gr.Slider(minimum=-30.0, maximum=30.0, step=0.01, value=0.0, label=feature) for feature in feature_names[1:-1]],
        gr.Slider(minimum=0.0, maximum=2500.0, step=0.01, value=50.0, label="Amount (Euros)")
    ]

# Prediction + SHAP function with input validation (accept dot decimals only)
def predict_with_shap(*inputs):
    try:
        # Convert inputs to float, replace comma with dot if any (reject commas by error)
        parsed_inputs = [float(str(x).replace(',', '.')) for x in inputs]
        X = np.array(parsed_inputs).reshape(1, -1)

        # Extract Time and Amount columns and scale using column names
        scaled_df = pd.DataFrame(X[:, [0, -1]], columns=['Time', 'Amount'])
        scaled_values = scaler.transform(scaled_df)

        # Put scaled values back in X
        X[:, 0] = scaled_values[:, 0]  # Scaled Time
        X[:, -1] = scaled_values[:, 1]  # Scaled Amount


        # Predict fraud probability
        pred_prob = model.predict_proba(X)[0][1]

        # Compute SHAP values
        shap_values = explainer.shap_values(X)

       # Plot SHAP bar summary
        plt.figure(figsize=(10, 5))
        shap.summary_plot(shap_values, X, feature_names=feature_names, plot_type="bar", show=False)
        plt.tight_layout()
        plt.savefig("shap_force_plot.png")
        plt.close()

        return f"Fraud Probability: {pred_prob:.4f}", "shap_force_plot.png"

    except ValueError:
        return "Error: Please enter decimals using dots (e.g. 6.5), not commas (6,5).", None
    except Exception as e:
        return f"An unexpected error occurred: {str(e)}", None

# Batch prediction for uploaded CSV
def batch_predict(file):
    try:
        if not file.name.endswith(".csv"):
            return "Error: Please upload a valid .csv file."

        df = pd.read_csv(file.name)
        X = df[feature_names]

        # Scale Time and Amount
        X[['Time', 'Amount']] = scaler.transform(X[['Time', 'Amount']])
        probs = model.predict_proba(X)[:, 1]
        df['Fraud_Probability'] = probs

        output_path = f"batch_predictions_{uuid.uuid4().hex[:6]}.csv"
        df.to_csv(output_path, index=False)
        return output_path

    except Exception as e:
        return f"Error processing file: {str(e)}"

# Footer and project links

description = """
Provide transaction features to estimate fraud probability using a pre-trained ML model.
SHAP explainability will highlight the most influential features.

**Input Guide:**
- `Time`: Seconds since the dataset's first transaction (range: 0–172792)
- `Amount`: Transaction amount in Euros (range: 0–2500)
- `V1–V28`: Anonymized PCA components (original features hidden)

⚠️ Use dot-decimals (e.g., 12.5) — do NOT use commas (e.g., 12,5).
"""



# Build Gradio app with two tabs (single and batch)
single_demo = gr.Interface(
    fn=predict_with_shap,
    inputs=get_inputs(),
    outputs=["text", "image"],
    title="Credit Card Fraud Detection Dashboard",
    description=description,
    article=f"""
    <hr>
    <p style='text-align:center;'>
    <em>Built by Mohamed Derouiche &mdash;
    <a href='https://github.com/mderouiche7' target='_blank'>GitHub</a> |
    <a href='https://www.linkedin.com/in/mohamed-derouiche-ba1843294' target='_blank'>LinkedIn</a> </em>
    </p>
    """
)


# Batch prediction interface
batch_demo = gr.Interface(
    fn=batch_predict,
    inputs=gr.File(label="Upload CSV with features"),
    outputs=gr.File(label="Download Predictions CSV"),
    title="Batch Fraud Prediction"
)

# Combine into a tabbed interface
tabs = gr.TabbedInterface(
    interface_list=[single_demo, batch_demo],
    tab_names=["Single Prediction", "Batch Prediction"]
)
if __name__ == "__main__":
    tabs.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://29a413508c9dcc1423.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
