In [1]:
!pip install streamlit pyngrok



In [2]:
import kagglehub
import os

path = kagglehub.dataset_download("kartik2112/fraud-detection")
print("Path to dataset files:", path)
print(os.listdir(path))

Path to dataset files: /kaggle/input/fraud-detection
['fraudTest.csv', 'fraudTrain.csv']


In [6]:
%%writefile simulator_stream.py
from pyngrok import ngrok
import kagglehub
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import time
import numpy as np
from datetime import datetime, timedelta
import joblib
import warnings
from xgboost import XGBClassifier
from imblearn.under_sampling import RandomUnderSampler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold, cross_val_score, GridSearchCV

warnings.filterwarnings('ignore')
path = kagglehub.dataset_download("kartik2112/fraud-detection")

# Page configuration
st.set_page_config(
    page_title="SecureBank Fraud Detection System",
    page_icon="🏦",
    layout="wide",
    initial_sidebar_state="collapsed"
)


# # Replace if needed to another model

# Load the trained model
@st.cache_resource
def load_model():
    """Load the trained XGBoost model"""
    try:
        model = joblib.load("best_xgb_model_macroF1_thresh0.9.pkl")
        st.success("✅ ML Model loaded successfully!")
        return model
    except Exception as e:
        st.warning(f"⚠️ Could not load model: {e}. Using rule-based scoring.")
        return None


# Load model
xgb_model = load_model()

# Custom CSS for professional banking interface
st.markdown("""
<style>
    @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');

    * {
        font-family: 'Inter', sans-serif;
    }

    .main {
        background-color: #f8f9fa;
    }

    .stApp {
        background-color: #f8f9fa;
    }

    /* Header styling */
    .main-header {
        background: linear-gradient(135deg, #1e3c72 0%, #2a5298 100%);
        color: white;
        padding: 20px;
        border-radius: 10px;
        margin-bottom: 20px;
        box-shadow: 0 4px 6px rgba(0,0,0,0.1);
    }

    .main-header h1 {
        margin: 0;
        font-weight: 700;
        font-size: 32px;
    }

    .main-header p {
        margin: 5px 0 0 0;
        opacity: 0.9;
        font-size: 16px;
    }

    /* Alert boxes */
    .alert-container {
        position: fixed;
        top: 20px;
        right: 20px;
        z-index: 999;
        max-width: 400px;
    }

    .alert-box {
        padding: 16px;
        border-radius: 8px;
        margin-bottom: 10px;
        box-shadow: 0 4px 12px rgba(0,0,0,0.15);
        animation: slideIn 0.5s ease-out;
        position: relative;
        overflow: hidden;
    }

    @keyframes slideIn {
        from {
            transform: translateX(100%);
            opacity: 0;
        }
        to {
            transform: translateX(0);
            opacity: 1;
        }
    }

    .alert-critical {
        background: linear-gradient(135deg, #ff4757 0%, #ff3838 100%);
        color: white;
        border-left: 4px solid #c92a2a;
    }

    .alert-high {
        background: linear-gradient(135deg, #ff6348 0%, #ff4757 100%);
        color: white;
        border-left: 4px solid #e03131;
    }

    .alert-medium {
        background: linear-gradient(135deg, #ffa502 0%, #ff7675 100%);
        color: white;
        border-left: 4px solid #f59f00;
    }

    .alert-low {
        background: linear-gradient(135deg, #f1c40f 0%, #f39c12 100%);
        color: white;
        border-left: 4px solid #f08c00;
    }

    /* Metric cards */
    .metric-card {
        background: white;
        border-radius: 12px;
        padding: 24px;
        box-shadow: 0 2px 8px rgba(0,0,0,0.08);
        height: 100%;
        transition: transform 0.2s ease, box-shadow 0.2s ease;
    }

    .metric-card:hover {
        transform: translateY(-2px);
        box-shadow: 0 4px 12px rgba(0,0,0,0.12);
    }

    .metric-value {
        font-size: 36px;
        font-weight: 700;
        margin: 8px 0;
    }

    .metric-label {
        font-size: 14px;
        color: #64748b;
        font-weight: 500;
        text-transform: uppercase;
        letter-spacing: 0.5px;
    }

    .metric-delta {
        font-size: 14px;
        font-weight: 600;
        display: inline-flex;
        align-items: center;
        padding: 4px 8px;
        border-radius: 4px;
        margin-top: 8px;
    }

    .metric-delta.positive {
        color: #10b981;
        background: #d1fae5;
    }

    .metric-delta.negative {
        color: #ef4444;
        background: #fee2e2;
    }

    /* Status indicator */
    .status-indicator {
        display: inline-flex;
        align-items: center;
        padding: 6px 12px;
        border-radius: 20px;
        font-size: 14px;
        font-weight: 600;
    }

    .status-active {
        background: #d1fae5;
        color: #065f46;
    }

    .status-inactive {
        background: #e5e7eb;
        color: #4b5563;
    }

    /* Pulse animation for live indicator */
    .pulse {
        display: inline-block;
        width: 8px;
        height: 8px;
        border-radius: 50%;
        background: #10b981;
        margin-right: 8px;
        animation: pulse 2s infinite;
    }

    @keyframes pulse {
        0% {
            box-shadow: 0 0 0 0 rgba(16, 185, 129, 0.4);
        }
        70% {
            box-shadow: 0 0 0 10px rgba(16, 185, 129, 0);
        }
        100% {
            box-shadow: 0 0 0 0 rgba(16, 185, 129, 0);
        }
    }

    /* Transaction table styling */
    .transaction-row {
        transition: background-color 0.3s ease;
    }

    .transaction-row:hover {
        background-color: #f8f9fa;
    }

    .fraud-badge {
        display: inline-block;
        padding: 4px 8px;
        border-radius: 4px;
        font-size: 12px;
        font-weight: 600;
        text-transform: uppercase;
    }

    .fraud-badge.critical {
        background: #ff4757;
        color: white;
    }

    .fraud-badge.high {
        background: #ff6348;
        color: white;
    }

    .fraud-badge.medium {
        background: #ffa502;
        color: white;
    }

    .fraud-badge.low {
        background: #f1c40f;
        color: white;
    }

    .fraud-badge.safe {
        background: #10b981;
        color: white;
    }
</style>
""", unsafe_allow_html=True)

# Initialize session state
if 'is_running' not in st.session_state:
    st.session_state.is_running = False
if 'current_row' not in st.session_state:
    st.session_state.current_row = 0
if 'transaction_history' not in st.session_state:
    st.session_state.transaction_history = []
if 'customer_profiles' not in st.session_state:
    st.session_state.customer_profiles = {}
if 'alert_log' not in st.session_state:
    st.session_state.alert_log = []
if 'total_processed' not in st.session_state:
    st.session_state.total_processed = 0
if 'total_fraudulent' not in st.session_state:
    st.session_state.total_fraudulent = 0
if 'total_amount_flagged' not in st.session_state:
    st.session_state.total_amount_flagged = 0


# Load data
@st.cache_data
def load_fraud_data():
    """Load the fraud test dataset"""
    df = pd.read_csv(f'{path}/fraudTest.csv')
    df['trans_date_trans_time'] = pd.to_datetime(df['trans_date_trans_time'])
    return df


def calculate_fraud_score(transaction, customer_profile):
    """Calculate fraud probability score (0-100)"""
    score = 0

    # Amount-based scoring
    if customer_profile['avg_amount'] > 0:
        amount_ratio = transaction['amt'] / customer_profile['avg_amount']
        if amount_ratio > 5:
            score += 40
        elif amount_ratio > 3:
            score += 25
        elif amount_ratio > 2:
            score += 15

    # Frequency-based scoring
    if customer_profile['transaction_count'] > 10:
        if customer_profile['last_transaction_time']:
            time_since_last = (transaction['trans_date_trans_time'] -
                               customer_profile['last_transaction_time']).total_seconds() / 60
            if time_since_last < 5:  # Less than 5 minutes
                score += 20

    # Category-based scoring
    if transaction['category'] not in customer_profile['common_categories']:
        score += 15

    # Location-based scoring (if available)
    if 'city' in transaction and customer_profile['common_cities']:
        if transaction['city'] not in customer_profile['common_cities']:
            score += 20

    # Time-based scoring
    hour = transaction['trans_date_trans_time'].hour
    if hour < 6 or hour > 22:  # Unusual hours
        score += 10

    # Add some randomness for demo
    score += np.random.randint(-5, 10)

    return min(100, max(0, score))


def get_risk_level(score):
    """Determine risk level based on score"""
    if score >= 80:
        return "CRITICAL", "alert-critical"
    elif score >= 60:
        return "HIGH", "alert-high"
    elif score >= 15:
        return "MEDIUM", "alert-medium"
    elif score >= 8:
        return "LOW", "alert-low"
    else:
        return "SAFE", ""


def update_customer_profile(transaction, profiles):
    """Update customer profile with new transaction"""
    cc_num = transaction['cc_num']

    if cc_num not in profiles:
        profiles[cc_num] = {
            'transaction_count': 0,
            'total_amount': 0,
            'avg_amount': 0,
            'common_categories': set(),
            'common_cities': set(),
            'last_transaction_time': None,
            'fraud_count': 0
        }

    profile = profiles[cc_num]
    profile['transaction_count'] += 1
    profile['total_amount'] += transaction['amt']
    profile['avg_amount'] = profile['total_amount'] / profile['transaction_count']
    profile['common_categories'].add(transaction['category'])
    if 'city' in transaction:
        profile['common_cities'].add(transaction['city'])
    profile['last_transaction_time'] = transaction['trans_date_trans_time']

    return profile


# Header
st.markdown("""
<div class="main-header">
    <h1>🏦 SecureBank Fraud Detection System</h1>
    <p>Real-time Transaction Monitoring & Anomaly Detection</p>
</div>
""", unsafe_allow_html=True)

# Status bar
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
    if st.session_state.is_running:
        st.markdown("""
        <div style="text-align: center;">
            <span class="status-indicator status-active">
                <span class="pulse"></span>
                SYSTEM ACTIVE - MONITORING TRANSACTIONS
            </span>
        </div>
        """, unsafe_allow_html=True)
    else:
        st.markdown("""
        <div style="text-align: center;">
            <span class="status-indicator status-inactive">
                SYSTEM INACTIVE
            </span>
        </div>
        """, unsafe_allow_html=True)

# Control panel
st.markdown("<br>", unsafe_allow_html=True)
col1, col2, col3, col4, col5 = st.columns(5)

with col1:
    if st.button("▶️ Start Monitoring", disabled=st.session_state.is_running, use_container_width=True):
        st.session_state.is_running = True
        st.session_state.current_row = 0
        st.rerun()

with col2:
    if st.button("⏸️ Pause", disabled=not st.session_state.is_running, use_container_width=True):
        st.session_state.is_running = False
        st.rerun()

with col3:
    if st.button("🔄 Reset", use_container_width=True):
        st.session_state.is_running = False
        st.session_state.current_row = 0
        st.session_state.transaction_history = []
        st.session_state.customer_profiles = {}
        st.session_state.alert_log = []
        st.session_state.total_processed = 0
        st.session_state.total_fraudulent = 0
        st.session_state.total_amount_flagged = 0
        st.rerun()

with col4:
    speed = st.select_slider("Speed", options=[0.1, 0.3, 0.5, 1.0, 2.0], value=0.5)

with col5:
    threshold = st.select_slider("Sensitivity", options=["Low", "Medium", "High"], value="Medium")

# Metrics dashboard
st.markdown("<br>", unsafe_allow_html=True)
metrics_col1, metrics_col2, metrics_col3, metrics_col4 = st.columns(4)

with metrics_col1:
    metric_container1 = st.container()

with metrics_col2:
    metric_container2 = st.container()

with metrics_col3:
    metric_container3 = st.container()

with metrics_col4:
    metric_container4 = st.container()

# Alert panel and main chart
st.markdown("<br>", unsafe_allow_html=True)
alert_col, chart_col = st.columns([1, 2])

with alert_col:
    st.markdown("### 🚨 Recent Alerts")
    alert_container = st.container()

with chart_col:
    st.markdown("### 📊 Transaction Flow Analysis")
    chart_container = st.empty()

# Transaction table
st.markdown("<br>", unsafe_allow_html=True)
st.markdown("### 📋 Live Transaction Feed")
table_container = st.empty()

# Load data
df = load_fraud_data()

# Main processing loop
if st.session_state.is_running and st.session_state.current_row < len(df):
    while st.session_state.is_running and st.session_state.current_row < len(df):
        # Get current transaction
        transaction = df.iloc[st.session_state.current_row].to_dict()

        # Update customer profile
        customer_profile = update_customer_profile(transaction, st.session_state.customer_profiles)

        # Calculate fraud score
        fraud_score = calculate_fraud_score(transaction, customer_profile)
        # fraud_score = predict_fraud_probability(transaction)
        risk_level, risk_class = get_risk_level(fraud_score)

        # Adjust score based on sensitivity
        if threshold == "High":
            fraud_score = min(100, fraud_score * 1.2)
        elif threshold == "Low":
            fraud_score = max(0, fraud_score * 0.8)

        # Update transaction with score
        transaction['fraud_score'] = fraud_score
        transaction['risk_level'] = risk_level

        # Add to history
        st.session_state.transaction_history.append(transaction)
        if len(st.session_state.transaction_history) > 100:
            st.session_state.transaction_history.pop(0)

        # Update metrics
        st.session_state.total_processed += 1
        if fraud_score >= 40:  # Medium risk or higher
            st.session_state.total_fraudulent += 1
            st.session_state.total_amount_flagged += transaction['amt']

            # Add to alert log
            st.session_state.alert_log.append({
                'time': transaction['trans_date_trans_time'],
                'customer': transaction['cc_num'][-4:],
                'amount': transaction['amt'],
                'risk_level': risk_level,
                'score': fraud_score,
                'category': transaction['category']
            })
            if len(st.session_state.alert_log) > 10:
                st.session_state.alert_log.pop(0)

        # Update metrics display
        with metric_container1:
            st.markdown(f"""
            <div class="metric-card">
                <div class="metric-label">Transactions Processed</div>
                <div class="metric-value">{st.session_state.total_processed:,}</div>
                <div class="metric-delta positive">↑ Active Monitoring</div>
            </div>
            """, unsafe_allow_html=True)

        with metric_container2:
            fraud_rate = (
                    st.session_state.total_fraudulent / st.session_state.total_processed * 100) if st.session_state.total_processed > 0 else 0
            st.markdown(f"""
            <div class="metric-card">
                <div class="metric-label">Suspicious Transactions</div>
                <div class="metric-value">{st.session_state.total_fraudulent}</div>
                <div class="metric-delta {'negative' if fraud_rate > 2 else 'positive'}">
                    {fraud_rate:.1f}% Detection Rate
                </div>
            </div>
            """, unsafe_allow_html=True)

        with metric_container3:
            st.markdown(f"""
            <div class="metric-card">
                <div class="metric-label">Amount Flagged</div>
                <div class="metric-value">${st.session_state.total_amount_flagged:,.0f}</div>
                <div class="metric-delta negative">↑ Under Review</div>
            </div>
            """, unsafe_allow_html=True)

        with metric_container4:
            active_customers = len(st.session_state.customer_profiles)
            st.markdown(f"""
            <div class="metric-card">
                <div class="metric-label">Active Customers</div>
                <div class="metric-value">{active_customers}</div>
                <div class="metric-delta positive">Monitoring</div>
            </div>
            """, unsafe_allow_html=True)

        # Update alerts
        with alert_container:
            if st.session_state.alert_log:
                for alert in reversed(st.session_state.alert_log[-5:]):
                    alert_html = f"""
                    <div class="alert-box {alert['risk_level'].lower()}">
                        <strong>{alert['risk_level']} RISK ALERT</strong><br>
                        Card: ****{alert['customer']}<br>
                        Amount: ${alert['amount']:.2f}<br>
                        Category: {alert['category']}<br>
                        Score: {alert['score']}/100<br>
                        Time: {alert['time'].strftime('%H:%M:%S')}
                    </div>
                    """
                    st.markdown(alert_html, unsafe_allow_html=True)
            else:
                st.info("No alerts yet...")

        # Update chart
        with chart_container:
            if len(st.session_state.transaction_history) > 1:
                history_df = pd.DataFrame(st.session_state.transaction_history)

                # Create subplots - MODIFIED: Changed to 1 row, 2 columns (side by side)
                fig = make_subplots(
                    rows=1, cols=2,
                    column_widths=[0.6, 0.4],
                    subplot_titles=("Transaction Amounts with Risk Scores", "Risk Level Distribution"),
                    horizontal_spacing=0.1
                )

                # Transaction scatter plot
                colors = ['red' if score >= 60 else 'orange' if score >= 40 else 'yellow' if score >= 20 else 'green'
                          for score in history_df['fraud_score']]

                fig.add_trace(
                    go.Scatter(
                        x=list(range(len(history_df))),
                        y=history_df['amt'],
                        mode='markers',
                        marker=dict(
                            size=history_df['fraud_score'] / 5,
                            color=history_df['fraud_score'],
                            colorscale='Reds',
                            showscale=True,
                            colorbar=dict(title="Risk Score")
                        ),
                        text=[f"${amt:.2f}<br>Score: {score}<br>Risk: {risk}"
                              for amt, score, risk in
                              zip(history_df['amt'], history_df['fraud_score'], history_df['risk_level'])],
                        hoverinfo='text',
                        name='Transactions'
                    ),
                    row=1, col=1
                )

                # Risk distribution bar chart
                risk_counts = history_df['risk_level'].value_counts()
                fig.add_trace(
                    go.Bar(
                        x=risk_counts.index,
                        y=risk_counts.values,
                        marker_color=['#ff4757', '#ff6348', '#ffa502', '#f1c40f', '#10b981'],
                        name='Risk Distribution'
                    ),
                    row=1, col=2
                )

                fig.update_layout(
                    height=500,
                    showlegend=False,
                    plot_bgcolor='white',
                    font=dict(family="Inter, sans-serif")
                )

                fig.update_xaxes(title_text="Transaction Index", row=1, col=1)
                fig.update_yaxes(title_text="Amount ($)", row=1, col=1, type="log")
                fig.update_xaxes(title_text="Risk Level", row=1, col=2)
                fig.update_yaxes(title_text="Count", row=1, col=2)

                st.plotly_chart(fig, use_container_width=True)

        # Update transaction table
        with table_container:
            if st.session_state.transaction_history:
                recent_transactions = pd.DataFrame(st.session_state.transaction_history[-20:])
                display_df = recent_transactions[
                    ['trans_date_trans_time', 'cc_num', 'amt', 'category', 'fraud_score', 'risk_level']].copy()

                # Format the display
                display_df['trans_date_trans_time'] = display_df['trans_date_trans_time'].dt.strftime(
                    '%Y-%m-%d %H:%M:%S')
                display_df['cc_num'] = display_df['cc_num'].astype(str).str[-4:].apply(lambda x: f"****{x}")
                display_df['amt'] = display_df['amt'].apply(lambda x: f"${x:.2f}")
                display_df['fraud_score'] = display_df['fraud_score'].apply(lambda x: f"{x}/100")


                # Apply risk level styling
                def style_risk_level(val):
                    if val == "CRITICAL":
                        return 'background-color: #ff4757; color: white; font-weight: bold;'
                    elif val == "HIGH":
                        return 'background-color: #ff6348; color: white; font-weight: bold;'
                    elif val == "MEDIUM":
                        return 'background-color: #ffa502; color: white; font-weight: bold;'
                    elif val == "LOW":
                        return 'background-color: #f1c40f; color: white; font-weight: bold;'
                    else:
                        return 'background-color: #10b981; color: white; font-weight: bold;'


                styled_df = display_df.style.applymap(style_risk_level, subset=['risk_level'])

                st.dataframe(
                    styled_df,
                    use_container_width=True,
                    hide_index=True,
                    column_config={
                        "trans_date_trans_time": "Timestamp",
                        "cc_num": "Card",
                        "amt": "Amount",
                        "category": "Category",
                        "fraud_score": "Risk Score",
                        "risk_level": "Risk Level"
                    }
                )

        # Increment counter
        st.session_state.current_row += 1

        # Sleep and rerun
        time.sleep(speed)
        st.rerun()

# Display completion message
if st.session_state.current_row >= len(df) and st.session_state.total_processed > 0:
    st.success("✅ All transactions processed!")
    st.balloons()

    # Summary report
    st.markdown("### 📊 Final Report")
    summary_col1, summary_col2, summary_col3 = st.columns(3)

    with summary_col1:
        st.info(f"**Total Transactions:** {st.session_state.total_processed:,}")

    with summary_col2:
        st.warning(f"**Suspicious Transactions:** {st.session_state.total_fraudulent:,}")

    with summary_col3:
        st.error(f"**Total Amount Flagged:** ${st.session_state.total_amount_flagged:,.2f}")

# Footer
st.markdown("---")
st.markdown("""
<div style="text-align: center; color: #64748b; font-size: 14px;">
    <p>SecureBank Fraud Detection System v2.0 | Powered by Advanced ML Algorithms</p>
    <p>© 2024 SecureBank Corporation. All rights reserved.</p>
</div>
""", unsafe_allow_html=True)

Overwriting simulator_stream.py


In [7]:
from pyngrok import ngrok
import os
# SET your token from https://dashboard.ngrok.com/get-started/your-authtoken.
NGROK_AUTH_TOKEN = "WRITE YOUR AUTH TOKEN"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

In [None]:
# OPEN tunnel to ngrok
public_url = ngrok.connect("http://localhost:8501")
print("🌐 Public URL:", public_url)

#running streamlit app
os.system("streamlit run /content/simulator_stream.py")

🌐 Public URL: NgrokTunnel: "https://4b77-34-34-78-213.ngrok-free.app" -> "http://localhost:8501"
