In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score

In [14]:
data = pd.read_csv("Fraud.csv")
data.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,0
1,1,PAYMENT,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,0
2,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,PAYMENT,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,0


In [15]:
 #Check for Missing Values
print(data.isnull().sum())

step              0
type              0
amount            0
nameOrig          0
oldbalanceOrg     0
newbalanceOrig    0
nameDest          0
oldbalanceDest    0
newbalanceDest    0
isFraud           0
isFlaggedFraud    0
dtype: int64


In [16]:
type = data["type"].value_counts()
transactions = type.index
quantity = type.values

import plotly.express as px
figure = px.pie(data,
             values=quantity,
             names=transactions,hole = 0.5,
             title="Distribution of Transaction Type")
figure.show()

In [17]:
# Checking correlation
correlation = data.select_dtypes(include='number').corr()
print(correlation["isFraud"].sort_values(ascending=False))

isFraud           1.000000
amount            0.076688
isFlaggedFraud    0.044109
step              0.031578
oldbalanceOrg     0.010154
newbalanceDest    0.000535
oldbalanceDest   -0.005885
newbalanceOrig   -0.008148
Name: isFraud, dtype: float64


In [18]:
# Encode the type feature
data["type"] = data["type"].map({
    "CASH_OUT": 1,
    "PAYMENT": 2,
    "CASH_IN": 3,
    "TRANSFER": 4,
    "DEBIT": 5
})

In [19]:
# Encode the target variable: isFraud (0, 1)
data["isFraud"] = data["isFraud"].map({0: 0, 1: 1})

In [20]:
# Select features and target
x = np.array(data[["type", "amount", "oldbalanceOrg", "newbalanceOrig"]])
y = np.array(data["isFraud"])

In [21]:
# Split data
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.20, random_state=42)

In [22]:
# Train XGBoost model
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
model.fit(xtrain, ytrain)

In [23]:
# Predict and evaluate
ypred = model.predict(xtest)

In [25]:
print("Accuracy:", accuracy_score(ytest, ypred))
print(" Classification Report:\n", classification_report(ytest, ypred))

Accuracy: 0.9995552146757154
 Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00   1270904
           1       0.88      0.75      0.81      1620

    accuracy                           1.00   1272524
   macro avg       0.94      0.88      0.91   1272524
weighted avg       1.00      1.00      1.00   1272524



In [27]:
#  input: type=TRANSFER (4), amount=9000.60, oldbalanceOrg=9000.60, newbalanceOrig=0.0
features = np.array([[4, 9000.60, 9000.60, 0.0]])
prediction = model.predict(features)

print("Prediction:", "Fraud" if prediction[0] == 1 else "No Fraud")

Prediction: Fraud
