In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import preprocessing
from sklearn.metrics import accuracy_score, confusion_matrix
from xgboost import XGBClassifier
import seaborn as sns
import pickle
%matplotlib inline

In [2]:
df=pd.read_csv(r"D:\ML-Fraud-Detection\notebooks\AIML Dataset.csv")

In [3]:
df1=df.drop(['isFlaggedFraud','nameDest','nameOrig'],axis=1)

In [4]:
fraudTransactions = df.loc[df.isFraud == 1].type.drop_duplicates().values 
print(list(fraudTransactions))

['TRANSFER', 'CASH_OUT']


In [5]:
dfTransactions = df.loc[(df.type == 'TRANSFER') | (df.type == 'CASH_OUT')] 
dfFraud = dfTransactions['isFraud']
del dfTransactions['isFraud']

In [6]:
dfTransactions.loc[dfTransactions.type == 'TRANSFER', 'type'] = 0 
dfTransactions.loc[dfTransactions.type == 'CASH_OUT', 'type'] = 1 
dfTransactions.type = dfTransactions.type.astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfTransactions.type = dfTransactions.type.astype(int)


In [7]:
dfTransactionsFraud = dfTransactions.loc[dfFraud == 1] 
dfTransactionsNonFraud = dfTransactions.loc[dfFraud == 0] 
fractionAnomalyTransactionsInFraud = len(dfTransactionsFraud.loc[
                                                                (dfTransactionsFraud.oldbalanceDest == 0)
                                                                & (dfTransactionsFraud.newbalanceDest == 0) 
                                                                & (dfTransactionsFraud.amount)
                                                                ]) / (1.0 * len(dfTransactionsFraud)) 
print("Part of anomaly transactions among fraudulent: ",fractionAnomalyTransactionsInFraud )
fractionAnomalyTransactionsInNonFraud = len(dfTransactionsNonFraud.loc[ (dfTransactionsNonFraud.oldbalanceDest == 0) & (dfTransactionsNonFraud.newbalanceDest == 0) & (dfTransactionsNonFraud.amount) ]) / (1.0 * len(dfTransactionsNonFraud)) 
print("Part of anomaly transactions among regular (non-fraudulent): ",fractionAnomalyTransactionsInNonFraud )

Part of anomaly transactions among fraudulent:  0.4955558261293072
Part of anomaly transactions among regular (non-fraudulent):  0.0006176245277308345


In [8]:
dfTransactions.loc[
                   (dfTransactions.oldbalanceDest == 0) 
                   & (dfTransactions.newbalanceDest == 0) 
                   & (dfTransactions.amount != 0),  
                   ['oldbalanceDest', 'newbalanceDest']] = - 1

In [9]:
dfTransactions.loc[ 
                   (dfTransactions.oldbalanceOrg == 0)
                   & (dfTransactions.newbalanceOrig == 0) & (dfTransactions.amount != 0), 
                   ['oldbalanceOrg', 'newbalanceOrig']] = np.nan

In [10]:
dfTransactions['errorbalanceDest'] = dfTransactions.oldbalanceDest + dfTransactions.amount - dfTransactions.newbalanceDest
dfTransactions['errorbalanceOrig'] = dfTransactions.newbalanceOrig + dfTransactions.amount - dfTransactions.oldbalanceOrg

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfTransactions['errorbalanceDest'] = dfTransactions.oldbalanceDest + dfTransactions.amount - dfTransactions.newbalanceDest
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfTransactions['errorbalanceOrig'] = dfTransactions.newbalanceOrig + dfTransactions.amount - dfTransactions.oldbalanceOrg


In [11]:
dfTransactions.drop(['step','nameOrig','nameDest','isFlaggedFraud'],axis=1,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfTransactions.drop(['step','nameOrig','nameDest','isFlaggedFraud'],axis=1,inplace=True)


In [12]:
df1=dfTransactions.drop(['errorbalanceDest',
 'errorbalanceOrig'],axis=1)

In [13]:
list(df1)

['type',
 'amount',
 'oldbalanceOrg',
 'newbalanceOrig',
 'oldbalanceDest',
 'newbalanceDest']

In [14]:
randomState = 5
np.random.seed(randomState)
trainX, testX, trainY, testY = train_test_split(dfTransactions, dfFraud, test_size = 0.2, random_state = randomState )

In [15]:
weights = (dfFraud == 0).sum() / (1.0 * (dfFraud == 1).sum()) 
classifier = XGBClassifier(max_depth = 3, scale_pos_weight = weights, n_jobs = 4)
predictions = classifier.fit(trainX, trainY).predict_proba(testX) 

In [16]:
preddf=classifier.predict(testX)
from sklearn.metrics import classification_report 

print(classification_report(testY,preddf))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00    552412
           1       0.84      1.00      0.91      1670

    accuracy                           1.00    554082
   macro avg       0.92      1.00      0.96    554082
weighted avg       1.00      1.00      1.00    554082



In [18]:
import joblib 
joblib.dump(classifier,'fraud_xgb_model.pkl')

['fraud_xgb_model.pkl']

In [19]:
import streamlit as st
import pandas as pd
import joblib

# Load trained XGBoost model
model = joblib.load("fraud_xgb_model.pkl")

st.title("Fraud Detection with XGBoost")

# --- User Inputs ---
amount = st.number_input("Transaction Amount", min_value=0.0, step=100.0)
oldbalanceOrg = st.number_input("Old Balance (Origin)", min_value=0.0, step=100.0)
newbalanceOrig = st.number_input("New Balance (Origin)", min_value=0.0, step=100.0)
oldbalanceDest = st.number_input("Old Balance (Destination)", min_value=0.0, step=100.0)
newbalanceDest = st.number_input("New Balance (Destination)", min_value=0.0, step=100.0)

# Transaction type (0 = TRANSFER, 1 = CASH_OUT)
transaction_type = st.selectbox("Transaction Type", ["TRANSFER (0)", "CASH_OUT (1)"])
type_value = 0 if "TRANSFER" in transaction_type else 1

# --- Build input DataFrame (one step with engineered features) ---
user_data = pd.DataFrame([{
    "amount": amount,
    "oldbalanceOrg": oldbalanceOrg,
    "newbalanceOrig": newbalanceOrig,
    "oldbalanceDest": oldbalanceDest,
    "newbalanceDest": newbalanceDest,
    "type": type_value,  # transaction type encoded
    "errorbalanceDest": oldbalanceDest + amount - newbalanceDest,
    "errorbalanceOrig": newbalanceOrig + amount - oldbalanceOrg
}])

# --- Predict ---
prediction = model.predict(user_data)[0]

# --- Display results ---
if prediction == 1:
    st.error("🚨 Fraud Detected")
else:
    st.success("✅ Not Fraudulent")


2025-09-09 16:12:19.492 
  command:

    streamlit run C:\Users\swaya\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


ValueError: feature_names mismatch: ['type', 'amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest', 'errorbalanceDest', 'errorbalanceOrig'] ['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest', 'type', 'errorbalanceDest', 'errorbalanceOrig']