In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import joblib

# Load clean dataset
df = pd.read_csv('Dataset/clean_fraud_dataset.csv')

In [5]:
# Features & Target
X = df.drop(columns=['is_fraud', 'trans_date_trans_time', 'merchant', 'city'], errors='ignore')
X = X.select_dtypes(include=['number'])  # Ensures only numeric columns remain
y = df['is_fraud']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=42)

# Handle Imbalance with SMOTE
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

print("✅ SMOTE applied successfully. Proceed to model training.")

✅ SMOTE applied successfully. Proceed to model training.


In [6]:
# Train XGBoost model
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
model.fit(X_train_res, y_train_res)

# Save the list of final feature columns
joblib.dump(X_train_res.columns.tolist(), 'feature_list.pkl')
print("✅ Feature list saved as 'feature_list.pkl'")

# Predictions
y_pred = model.predict(X_test)

# Evaluation
print("Classification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nROC AUC Score:", roc_auc_score(y_test, y_pred))

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


✅ Feature list saved as 'feature_list.pkl'
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.99      0.99    166072
           1       0.21      0.87      0.33       644

    accuracy                           0.99    166716
   macro avg       0.60      0.93      0.66    166716
weighted avg       1.00      0.99      0.99    166716


Confusion Matrix:
 [[163907   2165]
 [    81    563]]

ROC AUC Score: 0.9305935441007552


In [7]:
# Save Model
joblib.dump(model, 'trained_model.pkl')
print("✅ Model trained and saved as 'trained_model.pkl'")

✅ Model trained and saved as 'trained_model.pkl'
