In [2]:
# fraud_detection

import os
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [3]:
# Load and Combine Data 
data_folder_path = "data" 
Dataset = []

for filename in sorted(os.listdir(data_folder_path)):
    if filename.endswith('.pkl'):
        file_path = os.path.join(data_folder_path, filename)
        df = pd.read_pickle(file_path)
        Dataset.append(df)

full_df = pd.concat(Dataset, ignore_index=True)

In [4]:
full_df.head()

Unnamed: 0,TRANSACTION_ID,TX_DATETIME,CUSTOMER_ID,TERMINAL_ID,TX_AMOUNT,TX_TIME_SECONDS,TX_TIME_DAYS,TX_FRAUD,TX_FRAUD_SCENARIO
0,0,2018-04-01 00:00:31,596,3156,57.16,31,0,0,0
1,1,2018-04-01 00:02:10,4961,3412,81.51,130,0,0,0
2,2,2018-04-01 00:07:56,2,1365,146.0,476,0,0,0
3,3,2018-04-01 00:09:29,4128,8737,64.49,569,0,0,0
4,4,2018-04-01 00:10:34,927,9906,50.99,634,0,0,0


In [5]:
# Preprocessing
full_df['TX_DATETIME'] = pd.to_datetime(full_df['TX_DATETIME'])

# Extracting Features after converting it to datetime object for python
full_df['HOUR'] = full_df['TX_DATETIME'].dt.hour
full_df['DAY'] = full_df['TX_DATETIME'].dt.day
full_df['WEEKDAY'] = full_df['TX_DATETIME'].dt.weekday



In [6]:
# Train-Test data
features = ['TX_AMOUNT', 'HOUR', 'DAY', 'WEEKDAY']
X = full_df[features]
y = full_df['TX_FRAUD']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [7]:
#  Train Logistic Regression Model
print(" Logistic Regression Results ")
log_model = LogisticRegression()
log_model.fit(X_train, y_train)
log_preds = log_model.predict(X_test)
print(confusion_matrix(y_test, log_preds))

 Logistic Regression Results 
[[521905      0]
 [  3901    441]]


In [8]:
print(classification_report(y_test, log_preds))

              precision    recall  f1-score   support

           0       0.99      1.00      1.00    521905
           1       1.00      0.10      0.18      4342

    accuracy                           0.99    526247
   macro avg       1.00      0.55      0.59    526247
weighted avg       0.99      0.99      0.99    526247



In [9]:
# using Random forest
print("\n--- Random Forest Results ---")
rf = RandomForestClassifier(n_estimators=100)
rf.fit(X_train, y_train)
rf_preds = rf.predict(X_test)
print(confusion_matrix(y_test, rf_preds))
print(classification_report(y_test, rf_preds))


--- Random Forest Results ---
[[521470    435]
 [  3370    972]]
              precision    recall  f1-score   support

           0       0.99      1.00      1.00    521905
           1       0.69      0.22      0.34      4342

    accuracy                           0.99    526247
   macro avg       0.84      0.61      0.67    526247
weighted avg       0.99      0.99      0.99    526247



In [10]:
joblib.dump(rf, 'fraud_model.pkl')

['fraud_model.pkl']

In [11]:
# Custom Sample Test 
sample = [[250, 10, 3, 2]]  # Sample input: High amount, day & time
prediction = rf.predict(sample)
print("Sample prediction :", prediction)

Sample prediction : [1]




In [12]:
print(f"\nSample Prediction (1 = fraud, 0 = legit): {prediction[0]}")


Sample Prediction (1 = fraud, 0 = legit): 1


In [1]:
%%writefile app.py

import streamlit as st
import joblib
import numpy as np

# Load trained model
model = joblib.load('fraud_model.pkl')

# Title
st.title("💳 Fraud Transaction Detection")

# Sidebar for input
st.sidebar.header("Enter Transaction Details")

amount = st.sidebar.number_input("Transaction Amount", min_value=0.0, step=1.0)
hour = st.sidebar.slider("Hour (0-23)", 0, 23)
day = st.sidebar.slider("Day (1-31)", 1, 31)
weekday = st.sidebar.selectbox("Weekday (0=Mon, 6=Sun)", list(range(7)))

# Prediction button
if st.sidebar.button("Predict Fraud"):
    input_data = np.array([[amount, hour, day, weekday]])
    prediction = model.predict(input_data)[0]
    
    if prediction == 1:
        st.error("⚠️ This transaction is FRAUDULENT!")
    else:
        st.success("✅ This transaction is LEGITIMATE.")

Overwriting app.py
