# Fraud Detection Dashboard
# Author: K. Shashi Preetham
# Description: Interactive Streamlit dashboard to predict and analyze fraudulent financial transactions.

In [2]:
!pip install streamlit plotly shap lightgbm joblib


Collecting streamlit
  Downloading streamlit-1.50.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.50.0-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m74.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m88.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.50.0


In [7]:
import os
os.makedirs("model", exist_ok=True)


In [9]:
!ls model


fraud_model_slim.pkl  metadata.json


In [12]:

import streamlit as st
import pandas as pd
import numpy as np
import joblib
import json
import plotly.express as px
import shap
import matplotlib.pyplot as plt
from lightgbm import LGBMClassifier
import os

In [13]:
st.set_page_config(page_title="Fraud Detection Dashboard", layout="wide")

st.title("Fraud Detection Dashboard")
st.markdown("""
This interactive dashboard allows users to:
- Analyze and visualize transaction data.
- Make live fraud predictions using a trained LightGBM model.
- Explore feature importance and SHAP-based explainability.
""")



DeltaGenerator()

In [14]:
@st.cache_resource
def load_model():
    model_path = os.path.join("model", "fraud_model_slim.pkl")
    metadata_path = os.path.join("model", "metadata.json")

    if not os.path.exists(model_path) or not os.path.exists(metadata_path):
        st.error("Model files not found. Please ensure both `.pkl` and `.json` files are in the /model folder.")
        st.stop()

    model = joblib.load(model_path)
    with open(metadata_path, "r") as f:
        metadata = json.load(f)

    return model, metadata

with st.spinner("Loading model..."):
    model, metadata = load_model()

feature_cols = metadata["feature_cols"]
threshold = metadata["best_threshold"]
st.success("Model and metadata loaded successfully.")





DeltaGenerator()

In [15]:
st.header("Enter Transaction Details")

col1, col2, col3 = st.columns(3)

with col1:
    amount = st.number_input("Transaction Amount", min_value=0.0, value=5000.0)
    oldbalanceOrg = st.number_input("Old Balance (Origin)", min_value=0.0, value=15000.0)
    newbalanceOrig = st.number_input("New Balance (Origin)", min_value=0.0, value=10000.0)

with col2:
    oldbalanceDest = st.number_input("Old Balance (Destination)", min_value=0.0, value=5000.0)
    newbalanceDest = st.number_input("New Balance (Destination)", min_value=0.0, value=8000.0)
    type_code = st.selectbox("Transaction Type", ["PAYMENT", "TRANSFER", "CASH_OUT", "DEBIT"])

with col3:
    st.markdown("### Tip:")
    st.info("Adjust the fields to simulate different transaction patterns and test model predictions.")

# Encode transaction type
type_map = {"PAYMENT": 0, "TRANSFER": 1, "CASH_OUT": 2, "DEBIT": 3}
type_encoded = type_map.get(type_code, 0)

# Compute engineered features
delta_orig = oldbalanceOrg - newbalanceOrig
delta_dest = newbalanceDest - oldbalanceDest
amt_log = np.log1p(amount)
trans_ratio = amount / (oldbalanceOrg + 1e-5) if oldbalanceOrg > 0 else 0

# Prepare input dataframe
input_data = pd.DataFrame([{
    "amount": amount,
    "oldbalanceOrg": oldbalanceOrg,
    "newbalanceOrig": newbalanceOrig,
    "oldbalanceDest": oldbalanceDest,
    "newbalanceDest": newbalanceDest,
    "delta_orig": delta_orig,
    "delta_dest": delta_dest,
    "amt_log": amt_log,
    "trans_ratio": trans_ratio,
    "type_code": type_encoded
}])




In [16]:
st.header("Model Prediction")

if st.button("Run Prediction"):
    prob = model.predict_proba(input_data)[:, 1][0]
    prediction = "Fraudulent Transaction" if prob >= threshold else "Legitimate Transaction"

    st.metric(label="Fraud Probability", value=f"{prob*100:.2f}%")
    st.subheader(f"Prediction: {prediction}")

    # -------------------------------
    # SHAP Explainability
    # -------------------------------
    st.markdown("### Feature Impact on Prediction")
    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(input_data)

    if isinstance(shap_values, list):
        shap_values = shap_values[0]

    shap_df = pd.DataFrame({
        "Feature": feature_cols,
        "SHAP Value": shap_values[0],
        "Feature Value": input_data.values[0]
    }).sort_values(by="SHAP Value", ascending=False)

    fig = px.bar(
        shap_df,
        x="SHAP Value",
        y="Feature",
        orientation="h",
        color="SHAP Value",
        color_continuous_scale="RdBu",
        title="Feature Contributions to Prediction"
    )
    st.plotly_chart(fig, use_container_width=True)

# -------------------------------
# Visualization Section (Optional)
# -------------------------------
st.header("Data Insights and Visualization")

st.markdown("""
Explore interactive and animated visualizations from sample transaction data.
If you upload a CSV file, the dashboard will automatically generate insights.
""")

uploaded_file = st.file_uploader("Upload transaction CSV file (optional)", type=["csv"])

if uploaded_file:
    df = pd.read_csv(uploaded_file)

    # Fraud distribution
    fig1 = px.histogram(df, x="type", color="isFraud", barmode="group",
                        title="Fraud vs Non-Fraud by Transaction Type",
                        color_discrete_map={0: "green", 1: "red"})
    st.plotly_chart(fig1, use_container_width=True)

    # Animated transaction amount distribution
    if "step" in df.columns:
        fig2 = px.histogram(df[df["amount"] < 50000], x="amount", color="isFraud",
                            nbins=50, animation_frame="step",
                            title="Transaction Amount Distribution Over Time",
                            color_discrete_map={0: "lightblue", 1: "red"})
        st.plotly_chart(fig2, use_container_width=True)

    # Fraud ratio by transaction type
    fraud_rate = df.groupby("type")["isFraud"].mean().reset_index()
    fraud_rate["isFraud"] *= 100
    fig3 = px.bar(fraud_rate, x="type", y="isFraud", text="isFraud",
                  title="Fraud Rate (%) by Transaction Type",
                  color="isFraud", color_continuous_scale="Reds")
    fig3.update_traces(texttemplate="%{text:.2f}%", textposition="outside")
    st.plotly_chart(fig3, use_container_width=True)

else:
    st.info("Upload a CSV file to explore data visualizations.")

# -------------------------------
# Footer
# -------------------------------
st.markdown("---")
st.caption("Developed by K. Shashi Preetham | Powered by Streamlit & LightGBM")



DeltaGenerator()