In [1]:
import pandas as pd
import numpy as np

from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [2]:
df=pd.read_csv("paysim_processed.csv")
df.head()

Unnamed: 0,step,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud,balance_delta_org,balance_delta_dest,hour,type_CASH_OUT,type_DEBIT,type_PAYMENT,type_TRANSFER
0,278,330218.42,20866.0,351084.42,452419.57,122201.15,0,0,-330218.42,-330218.42,14,False,False,False,False
1,15,11647.08,30370.0,18722.92,0.0,0.0,0,0,11647.08,0.0,15,False,False,True,False
2,10,152264.21,106589.0,258853.21,201303.01,49038.8,0,0,-152264.21,-152264.21,10,False,False,False,False
3,403,1551760.63,0.0,0.0,3198359.45,4750120.08,0,0,0.0,1551760.63,19,False,False,False,True
4,206,78172.3,2921331.58,2999503.88,415821.9,337649.6,0,0,-78172.3,-78172.3,14,False,False,False,False


In [3]:
df.shape

(150000, 15)

In [4]:
df.isna().sum()

step                  0
amount                0
oldbalanceOrg         0
newbalanceOrig        0
oldbalanceDest        0
newbalanceDest        0
isFraud               0
isFlaggedFraud        0
balance_delta_org     0
balance_delta_dest    0
hour                  0
type_CASH_OUT         0
type_DEBIT            0
type_PAYMENT          0
type_TRANSFER         0
dtype: int64

In [5]:
ml_features = [
    "amount",
    "oldbalanceOrg",
    "newbalanceOrig",
    "oldbalanceDest",
    "newbalanceDest",
    "balance_delta_org",
    "balance_delta_dest",
    "hour"
]

X = df[ml_features]

In [6]:
pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("iforest", IsolationForest(
        n_estimators=200,
        contamination=0.02,  # assume ~2% anomalies
        random_state=42,
        n_jobs=-1
    ))
])

pipeline.fit(X)


In [7]:
raw_scores = pipeline.named_steps["iforest"].score_samples(
    pipeline.named_steps["scaler"].transform(X)
)

# Invert scores
df["ml_fraud_score"] = -raw_scores

# Normalize to 0–1
df["ml_fraud_score"] = (
    df["ml_fraud_score"] - df["ml_fraud_score"].min()
) / (
    df["ml_fraud_score"].max() - df["ml_fraud_score"].min()
)


In [8]:
df.head()

Unnamed: 0,step,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud,balance_delta_org,balance_delta_dest,hour,type_CASH_OUT,type_DEBIT,type_PAYMENT,type_TRANSFER,ml_fraud_score
0,278,330218.42,20866.0,351084.42,452419.57,122201.15,0,0,-330218.42,-330218.42,14,False,False,False,False,0.214322
1,15,11647.08,30370.0,18722.92,0.0,0.0,0,0,11647.08,0.0,15,False,False,True,False,0.007399
2,10,152264.21,106589.0,258853.21,201303.01,49038.8,0,0,-152264.21,-152264.21,10,False,False,False,False,0.124148
3,403,1551760.63,0.0,0.0,3198359.45,4750120.08,0,0,0.0,1551760.63,19,False,False,False,True,0.430251
4,206,78172.3,2921331.58,2999503.88,415821.9,337649.6,0,0,-78172.3,-78172.3,14,False,False,False,False,0.226824


In [9]:
df["ml_anomaly_flag"] = (df["ml_fraud_score"] > 0.8).astype(int)

In [10]:
df.head()

Unnamed: 0,step,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud,balance_delta_org,balance_delta_dest,hour,type_CASH_OUT,type_DEBIT,type_PAYMENT,type_TRANSFER,ml_fraud_score,ml_anomaly_flag
0,278,330218.42,20866.0,351084.42,452419.57,122201.15,0,0,-330218.42,-330218.42,14,False,False,False,False,0.214322,0
1,15,11647.08,30370.0,18722.92,0.0,0.0,0,0,11647.08,0.0,15,False,False,True,False,0.007399,0
2,10,152264.21,106589.0,258853.21,201303.01,49038.8,0,0,-152264.21,-152264.21,10,False,False,False,False,0.124148,0
3,403,1551760.63,0.0,0.0,3198359.45,4750120.08,0,0,0.0,1551760.63,19,False,False,False,True,0.430251,0
4,206,78172.3,2921331.58,2999503.88,415821.9,337649.6,0,0,-78172.3,-78172.3,14,False,False,False,False,0.226824,0


In [11]:
df['ml_anomaly_flag'].value_counts()  #only 640

ml_anomaly_flag
0    149360
1       640
Name: count, dtype: int64

In [12]:
pd.crosstab(df["ml_anomaly_flag"], df["isFraud"])


isFraud,0,1
ml_anomaly_flag,Unnamed: 1_level_1,Unnamed: 2_level_1
0,149180,180
1,628,12


In [13]:
df.to_csv(r"C:\Users\MAITHILI\Gen-AI_FraudGuard\paysim_with_ml_scores.csv",
         index=False)