In [2]:
import pandas as pd

data = pd.read_csv(r"C:\Users\saksh\Desktop\bank_transactions_data_2.csv")
print(data.columns)


Index(['TransactionID', 'AccountID', 'TransactionAmount', 'TransactionDate',
       'TransactionType', 'Location', 'DeviceID', 'IP Address', 'MerchantID',
       'Channel', 'CustomerAge', 'CustomerOccupation', 'TransactionDuration',
       'LoginAttempts', 'AccountBalance', 'PreviousTransactionDate'],
      dtype='object')


In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import IsolationForest
import joblib

# Load data
data = pd.read_csv(r"C:\Users\saksh\Desktop\bank_transactions_data_2.csv")

# Feature engineering
data['TransactionDate'] = pd.to_datetime(data['TransactionDate'])
data['PrevDate'] = pd.to_datetime(data['PreviousTransactionDate'])
data['tx_hour'] = data['TransactionDate'].dt.hour
data['tx_dayofweek'] = data['TransactionDate'].dt.dayofweek
data['time_since_prev'] = (data['TransactionDate'] - data['PrevDate']).dt.total_seconds().fillna(0)

# Features to use
numeric_features = ["TransactionAmount", "CustomerAge", "LoginAttempts", 
                    "AccountBalance", "TransactionDuration", 
                    "tx_hour", "tx_dayofweek", "time_since_prev"]

categorical_features = ["TransactionType", "Location", "Channel", "CustomerOccupation"]

X = data[numeric_features + categorical_features]

# Preprocessing
preprocessor = ColumnTransformer(transformers=[
    ("num", StandardScaler(), numeric_features),
    ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), categorical_features)
])

# Build pipeline with Isolation Forest
clf = Pipeline(steps=[
    ("pre", preprocessor),
    ("iforest", IsolationForest(n_estimators=200, contamination=0.05, random_state=42))
])

# Train model
clf.fit(X)

# Predict anomalies (-1 = fraud, 1 = normal)
data['fraud_prediction'] = clf['iforest'].predict(clf['pre'].transform(X))
data['fraud_prediction'] = data['fraud_prediction'].map({1: 0, -1: 1})  # 1 = fraud, 0 = normal

# Save model
joblib.dump(clf, "unsupervised_upi_fraud_iforest.pkl")

# Save dataset with predictions
data.to_csv("bank_transactions_with_fraud_predictions.csv", index=False)

print("✅ Model trained and fraud predictions saved!")


✅ Model trained and fraud predictions saved!
