In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest, RandomForestClassifier
from sklearn.metrics import classification_report, average_precision_score
import xgboost as xgb

# Load data
data = pd.read_csv(r"C:\Users\hp\Downloads\creditcard.csv")

# Separate features and target
X = data.drop('Class', axis=1)
y = data['Class']

# Split data (time-based split would be better)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale Amount and Time
scaler = StandardScaler()
X_train[['Amount', 'Time']] = scaler.fit_transform(X_train[['Amount', 'Time']])
X_test[['Amount', 'Time']] = scaler.transform(X_test[['Amount', 'Time']])

# Option 1: Isolation Forest (anomaly detection)
iso_forest = IsolationForest(n_estimators=100, contamination=0.0017, random_state=42)
iso_forest.fit(X_train)
y_pred_iso = iso_forest.predict(X_test)
y_pred_iso = [1 if x == -1 else 0 for x in y_pred_iso] # Convert to 0/1

# Option 2: Random Forest with class weights
rf = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

# Option 3: XGBoost with scale_pos_weight
scale_pos_weight = len(y_train[y_train==0])/len(y_train[y_train==1])
xgb_model = xgb.XGBClassifier(scale_pos_weight=scale_pos_weight, random_state=42)
xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)

# Evaluate models
print("Isolation Forest:")
print(classification_report(y_test, y_pred_iso))
print("AUPRC:", average_precision_score(y_test, y_pred_iso))

print("\nRandom Forest:")
print(classification_report(y_test, y_pred_rf))
print("AUPRC:", average_precision_score(y_test, y_pred_rf))

print("\nXGBoost:")
print(classification_report(y_test, y_pred_xgb))
print("AUPRC:", average_precision_score(y_test, y_pred_xgb))

Isolation Forest:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     28427
           1       0.28      0.31      0.30        54

    accuracy                           1.00     28481
   macro avg       0.64      0.66      0.65     28481
weighted avg       1.00      1.00      1.00     28481

AUPRC: 0.09049664255269162

Random Forest:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     28427
           1       0.98      0.83      0.90        54

    accuracy                           1.00     28481
   macro avg       0.99      0.92      0.95     28481
weighted avg       1.00      1.00      1.00     28481

AUPRC: 0.8155333914447924

XGBoost:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     28427
           1       0.94      0.85      0.89        54

    accuracy                           1.00     28481
   macro avg       0.97      0.

In [None]:
#Random forest turns out to be the best model for fraud detection followed by XGBoost.
#Use Isolation Forest for initial anomaly screening → Flag suspicious cases.
#Run Random Forest/XGBoost only on flagged cases → Reduce computational cost.



In [2]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-3.0.2-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-3.0.2-py3-none-win_amd64.whl (150.0 MB)
   ---------------------------------------- 0.0/150.0 MB ? eta -:--:--
   - -------------------------------------- 5.0/150.0 MB 27.7 MB/s eta 0:00:06
   --- ------------------------------------ 11.3/150.0 MB 29.4 MB/s eta 0:00:05
   ---- ----------------------------------- 17.0/150.0 MB 28.7 MB/s eta 0:00:05
   ------ --------------------------------- 23.3/150.0 MB 29.2 MB/s eta 0:00:05
   -------- ------------------------------- 30.1/150.0 MB 29.5 MB/s eta 0:00:05
   --------- ------------------------------ 36.7/150.0 MB 29.8 MB/s eta 0:00:04
   ---------- ----------------------------- 37.7/150.0 MB 29.8 MB/s eta 0:00:04
   ----------- ---------------------------- 43.8/150.0 MB 26.5 MB/s eta 0:00:05
   ------------- -------------------------- 50.6/150.0 MB 27.3 MB/s eta 0:00:04
   --------------- ------------------------ 57.1/150.0 MB 27.6


[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip
