In [3]:
import numpy as np
import pandas as pd
import time
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, classification_report
)
from joblib import Parallel, delayed
import pyswarms as ps

# Load data
df = pd.read_csv(r"C:\Users\KIIT\Desktop\Fraud_Detection\model\card_transdata.csv")

# Sampling
sample_size = 100000
df_sampled = df.groupby("fraud", group_keys=False).apply(
    lambda x: x.sample(int(sample_size * len(x) / len(df)), random_state=42)
).reset_index(drop=True)

# Train-test split
X = df_sampled.drop(columns=["fraud"])
y = df_sampled["fraud"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# PSO fitness function
def evaluate_particle(particle):
    selected = np.where(particle > 0.75)[0]
    if len(selected) == 0:
        return 1e10
    X_train_sel = X_train.iloc[:, selected]
    X_test_sel = X_test.iloc[:, selected]
    model = RandomForestClassifier(n_estimators=100, class_weight='balanced_subsample', random_state=42, n_jobs=1)
    model.fit(X_train_sel, y_train)
    preds = model.predict(X_test_sel)
    fraud_recall = recall_score(y_test, preds, pos_label=1)
    accuracy = accuracy_score(y_test, preds)
    return 1 - (0.6 * fraud_recall + 0.4 * accuracy)

def fitness_function(particles):
    return np.array(Parallel(n_jobs=-1)(
        delayed(evaluate_particle)(p) for p in particles
    ))

# Run PSO
options = {'c1': 2, 'c2': 2, 'w': 0.9}
dimensions = X.shape[1]
optimizer = ps.single.GlobalBestPSO(n_particles=20, dimensions=dimensions, options=options)

print("\n🔄 Running PSO optimization...")
start_time = time.time()
best_cost, best_pos = optimizer.optimize(fitness_function, iters=50)
pso_time = time.time() - start_time

selected = np.where(best_pos > 0.75)[0]
selected_feature_names = X.columns[selected]
print("\n🧠 Selected Feature Indices:", selected)
print("📝 Selected Feature Names:", list(selected_feature_names))
X_train_sel = X_train.iloc[:, selected]
X_test_sel = X_test.iloc[:, selected]

# Train final model
model = RandomForestClassifier(n_estimators=100, class_weight='balanced_subsample', random_state=42, n_jobs=-1)
model.fit(X_train_sel, y_train)

# Evaluate
y_pred = model.predict(X_test_sel)
y_proba = model.predict_proba(X_test_sel)[:, 1]

print("\n📊 Final Model Performance:")
print(f"Accuracy : {accuracy_score(y_test, y_pred):.4f}")
print(f"Precision: {precision_score(y_test, y_pred):.4f}")
print(f"Recall   : {recall_score(y_test, y_pred):.4f}")
print(f"F1 Score : {f1_score(y_test, y_pred):.4f}")
print(f"AUC-ROC  : {roc_auc_score(y_test, y_proba):.4f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred, zero_division=0))

# Save model and selected features
joblib.dump({
    "model": model,
    "selected_indices": selected,
    "selected_feature_names": list(X.columns[selected])
}, "rf_pso_model.pkl")
print("\n✅ Model saved to 'rf_pso_model.pkl'")

  df_sampled = df.groupby("fraud", group_keys=False).apply(
2025-04-10 13:12:00,032 - pyswarms.single.global_best - INFO - Optimize for 50 iters with {'c1': 2, 'c2': 2, 'w': 0.9}



🔄 Running PSO optimization...


pyswarms.single.global_best: 100%|██████████|50/50, best_cost=0.00109
2025-04-10 16:04:51,191 - pyswarms.single.global_best - INFO - Optimization finished | best cost: 0.001089748283752856, best pos: [ 1.27301658  1.63613408  0.75511403 -0.87720702  4.95000903  2.81962125
  1.6521314 ]



📊 Final Model Performance:
Accuracy : 0.9999
Precision: 1.0000
Recall   : 0.9983
F1 Score : 0.9991
AUC-ROC  : 1.0000

Classification Report:
               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00     18252
         1.0       1.00      1.00      1.00      1748

    accuracy                           1.00     20000
   macro avg       1.00      1.00      1.00     20000
weighted avg       1.00      1.00      1.00     20000


✅ Model saved to 'rf_pso_model.pkl'
