<a href="https://colab.research.google.com/github/mdahmadullahmahmood/fraud-detection-streamlit/blob/main/fraud_detection_streamlit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow scikit-learn joblib streamlit email-validator

Collecting streamlit
  Downloading streamlit-1.43.2-py2.py3-none-any.whl.metadata (8.9 kB)
Collecting email-validator
  Downloading email_validator-2.2.0-py3-none-any.whl.metadata (25 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting dnspython>=2.0.0 (from email-validator)
  Downloading dnspython-2.7.0-py3-none-any.whl.metadata (5.8 kB)
Downloading streamlit-1.43.2-py2.py3-none-any.whl (9.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m64.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading email_validator-2.2.0-py3-none-any.whl (33 kB)
Downloading dnspython-2.7.0-py3-none-any.whl (313 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
import joblib
import os
import streamlit as st


In [None]:
MODEL_PATH = "/content/model.h5"
SCALER_PATH = "/content/scaler.pkl"


In [None]:
import numpy as np
import pandas as pd
import joblib
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

def load_data():
    file_path = "/content/synthetic_fraud_dataset.csv"  # Ensure correct path
    data = pd.read_csv(file_path)

    # Define the four selected features
    numerical_features = ["Transaction_Amount", "Account_Balance", "Risk_Score"]
    categorical_features = ["Transaction_Type"]  # Assuming Transaction_Type is categorical

    # Keep only selected features
    data = data[numerical_features + categorical_features]

    # One-hot encode the categorical feature
    encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
    categorical_data = encoder.fit_transform(data[categorical_features])

    # Scale the numerical features
    scaler = MinMaxScaler()
    numerical_data = scaler.fit_transform(data[numerical_features])

    # Combine processed numerical and categorical data
    X = np.hstack((numerical_data, categorical_data))

    # Save the scaler and encoder for future use
    joblib.dump(scaler, "scaler.pkl")
    joblib.dump(encoder, "encoder.pkl")

    return X, scaler, encoder

# Run the function
X, scaler, encoder = load_data()

# Check the processed data shape
print("Processed Data Shape:", X.shape)


Processed Data Shape: (50000, 7)


In [None]:
def build_autoencoder(input_dim):
    input_layer = tf.keras.layers.Input(shape=(input_dim,))
    encoded = tf.keras.layers.Dense(128, activation="relu")(input_layer)
    encoded = tf.keras.layers.Dense(64, activation="relu")(encoded)
    encoded = tf.keras.layers.Dense(32, activation="relu")(encoded)
    latent = tf.keras.layers.Dense(16, activation="relu")(encoded)
    decoded = tf.keras.layers.Dense(32, activation="relu")(latent)
    decoded = tf.keras.layers.Dense(64, activation="relu")(decoded)
    decoded = tf.keras.layers.Dense(128, activation="relu")(decoded)
    output_layer = tf.keras.layers.Dense(input_dim, activation="sigmoid")(decoded)

    autoencoder = tf.keras.models.Model(inputs=input_layer, outputs=output_layer)
    autoencoder.compile(optimizer="adam", loss="mse")
    return autoencoder


In [None]:
def train_and_save_model():
    X, scaler, encoder = load_data()
    X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)
    autoencoder = build_autoencoder(input_dim=X.shape[1])
    autoencoder.fit(X_train, X_train, epochs=50, batch_size=64, shuffle=True, validation_split=0.2)
    autoencoder.save("model.h5")

    X_test_pred = autoencoder.predict(X_test)
    reconstruction_errors = np.mean(np.square(X_test - X_test_pred), axis=1)
    threshold = np.percentile(reconstruction_errors, 95)

    return threshold

threshold = train_and_save_model()


Epoch 1/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 0.0576 - val_loss: 0.0013
Epoch 2/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 0.0013 - val_loss: 9.5537e-04
Epoch 3/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 6.9373e-04 - val_loss: 2.6959e-04
Epoch 4/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 2.8958e-04 - val_loss: 2.6348e-04
Epoch 5/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 2.2116e-04 - val_loss: 2.5448e-04
Epoch 6/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 1.8317e-04 - val_loss: 1.6417e-04
Epoch 7/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 1.6118e-04 - val_loss: 1.5559e-04
Epoch 8/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 1.2276e-04 - val_loss:



[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


In [None]:
def detect_anomalies(autoencoder, X, threshold):
    X_pred = autoencoder.predict(X)
    reconstruction_error = np.mean(np.square(X - X_pred), axis=1)
    anomalies = reconstruction_error > threshold
    return anomalies, reconstruction_error


In [None]:
def process_transaction(transaction, threshold):
    autoencoder = tf.keras.models.load_model("model.h5")
    scaler = joblib.load("scaler.pkl")
    encoder = joblib.load("encoder.pkl")

    required_numerical_features = ["Transaction_Amount", "Account_Balance", "Risk_Score"]
    required_categorical_features = ["Transaction_Type"]  # Must be categorical

    # ✅ Ensure required features exist in transaction
    missing_features = [feat for feat in (required_numerical_features + required_categorical_features) if feat not in transaction]
    if missing_features:
        raise ValueError(f"Missing required features: {missing_features}")

    # ✅ Convert to DataFrame
    transaction_df = pd.DataFrame([transaction])

    # ✅ Handle categorical transformation properly
    transaction_df["Transaction_Type"] = transaction_df["Transaction_Type"].astype(str)  # Convert to string if needed
    categorical_data = encoder.transform(transaction_df[required_categorical_features])

    # ✅ Scale numerical features
    numerical_data = scaler.transform(transaction_df[required_numerical_features])

    # ✅ Combine scaled numerical & encoded categorical data
    transaction_scaled = np.hstack((numerical_data, categorical_data))

    # ✅ Detect anomalies properly
    is_anomaly, error = detect_anomalies(autoencoder, transaction_scaled, threshold)

    if is_anomaly[0]:  # Ensure it's a single boolean, not an array
        alert_message = f"🚨 ALERT! Anomalous transaction detected. Reconstruction Error: {error[0]}"
    else:
        alert_message = "✅ Transaction is normal."

    return alert_message, transaction_df


In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
import joblib
import tensorflow as tf
import numpy as np

st.title("Real-Time Fraud Detection System")

amount = st.number_input("Transaction Amount", min_value=0.0, step=0.01)
transaction_type = st.selectbox("Transaction Type", ["POS", "Online", "ATM Withdrawal", "Bank Transfer"])
account_balance = st.number_input("Account Balance", min_value=0.0, step=0.01)
risk_score = st.number_input("Risk Score", min_value=0.0, step=0.01)

if st.button("Check for Fraud"):
    transaction = {
        "Transaction_Amount": amount,
        "Transaction_Type": transaction_type,
        "Account_Balance": account_balance,
        "Risk_Score": risk_score
    }
    alert_message, transaction_df = process_transaction(transaction,threshold)
    st.write(alert_message)
    st.dataframe(transaction_df)
else:
   st.error("Please fill in all fields.")



Overwriting app.py


In [None]:
from google.colab import files
files.download("model.h5")
files.download("scaler.pkl")
files.download("encoder.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
!wget -q -O - ipv4.icanhazip.com

34.171.7.53


In [None]:
!streamlit run app.py & npx localtunnel --port 8501

[1G[0K⠙
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.171.7.53:8501[0m
[0m
[1G[0K⠇[1G[0K⠏[1G[0Kyour url is: https://yellow-dryers-sin.loca.lt
2025-03-18 12:17:07.102834: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1742300227.137051   34507 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1742300227.147011   34507 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS w