In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import average_precision_score, roc_auc_score, confusion_matrix, classification_report

# Load Datasets

In [2]:
X_train = pd.read_csv('../data/X_train.csv')
y_train = pd.read_csv('../data/y_train.csv')
X_train_resampled = pd.read_csv('../data/X_train_resampled.csv')
y_train_resampled = pd.read_csv('../data/y_train_resampled.csv')
X_val = pd.read_csv('../data/X_val.csv')
y_val = pd.read_csv('../data/y_val.csv')
X_test = pd.read_csv('../data/X_test.csv')
y_test = pd.read_csv('../data/y_test.csv')

# Use Log-Transformed Features

In [3]:
log_transformed_columns = ['days_since_request_log', 'intended_balcon_amount_log',
                           'zip_count_4w_log', 'velocity_24h_log', 'velocity_4w_log',
                           'date_of_birth_distinct_emails_4w_log','session_length_in_minutes_log']
original_columns = ['days_since_request', 'intended_balcon_amount_clean',
                    'zip_count_4w', 'velocity_24h', 'velocity_4w',
                    'date_of_birth_distinct_emails_4w', 'session_length_in_minutes_cleaned']

In [4]:
X_train_resampled_log = X_train_resampled.drop(columns=original_columns)
X_train_log = X_train.drop(columns=original_columns)
X_val_log = X_val.drop(columns=original_columns)
X_test_log = X_test.drop(columns=original_columns)

## Use SMOTE resampled Training Data

### Baseline

In [14]:
# Baseline XGBoost model
xgb_baseline = XGBClassifier(
    n_estimators=300,          # number of trees
    max_depth=6,               # tree depth
    learning_rate=0.2,         # step size shrinkage
    eval_metric='auc',     # evaluation metric
    random_state=42,
    n_jobs=-1                  # use all CPU cores
)

# Fit on training data
xgb_baseline.fit(X_train_resampled_log, y_train_resampled.values.ravel())

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [6]:
# Evaluate on Train Set
y_train_pred = xgb_baseline.predict(X_train_resampled_log)
y_train_proba = xgb_baseline.predict_proba(X_train_resampled_log)[:, 1]

# Classification report
print("Training Classification Report:\n")
print(classification_report(y_train_resampled, y_train_pred, digits=4))

# ROC-AUC
roc_auc_train = roc_auc_score(y_train_resampled, y_train_proba)
print(f"Training ROC-AUC Score: {roc_auc_train:.4f}")

# PR-AUC
pr_auc_train = average_precision_score(y_train_resampled, y_train_proba)
print(f"Training PR-AUC Score: {pr_auc_train:.4f}")

# Confusion matrix
print("Training Confusion Matrix:\n", confusion_matrix(y_train_resampled, y_train_pred))

Training Classification Report:

              precision    recall  f1-score   support

           0     0.9911    0.9997    0.9954    786838
           1     0.9997    0.9910    0.9954    786838

    accuracy                         0.9954   1573676
   macro avg     0.9954    0.9954    0.9954   1573676
weighted avg     0.9954    0.9954    0.9954   1573676

Training ROC-AUC Score: 0.9996
Training PR-AUC Score: 0.9996
Training Confusion Matrix:
 [[786630    208]
 [  7043 779795]]


In [7]:
# Evaluate on Validation Set
y_val_pred = xgb_baseline.predict(X_val_log)
y_val_proba = xgb_baseline.predict_proba(X_val_log)[:, 1]

# Classification report
print("\nValidation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))

# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Validation ROC-AUC Score: {roc_auc_val:.4f}")

# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Validation PR-AUC Score: {pr_auc_val:.4f}")

# Confusion matrix
print("Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))


Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9875    0.9983    0.9929    106718
           1     0.3599    0.0717    0.1196      1450

    accuracy                         0.9858    108168
   macro avg     0.6737    0.5350    0.5562    108168
weighted avg     0.9791    0.9858    0.9812    108168

Validation ROC-AUC Score: 0.8789
Validation PR-AUC Score: 0.1490
Validation Confusion Matrix:
 [[106533    185]
 [  1346    104]]


In [15]:
# Evaluate on Test Set
y_test_pred = xgb_baseline.predict(X_test_log)
y_test_proba = xgb_baseline.predict_proba(X_test_log)[:, 1]
# Classification report
print("\nTest Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))
# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Test ROC-AUC Score: {roc_auc_test:.4f}")
# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Test PR-AUC Score: {pr_auc_test:.4f}")
# Confusion matrix
print("Test Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))


Test Classification Report:

              precision    recall  f1-score   support

           0     0.9864    0.9985    0.9924     95415
           1     0.4406    0.0805    0.1362      1428

    accuracy                         0.9849     96843
   macro avg     0.7135    0.5395    0.5643     96843
weighted avg     0.9784    0.9849    0.9798     96843

Test ROC-AUC Score: 0.8729
Test PR-AUC Score: 0.1768
Test Confusion Matrix:
 [[95269   146]
 [ 1313   115]]


### Hyperparameter Tuning

In [9]:
import optuna

def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 200, 800),
        "max_depth": trial.suggest_int("max_depth", 3, 10),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
        "gamma": trial.suggest_float("gamma", 0, 0.5),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 10),
        "eval_metric": "auc",
        "random_state": 42,
        "n_jobs": -1
    }

    model = XGBClassifier(**params)

    model.fit(
        X_train_resampled_log, 
        y_train_resampled.values.ravel(),
        eval_set=[(X_val_log, y_val)],
        verbose=False
    )

    # Predict on validation
    preds = model.predict_proba(X_val_log)[:, 1]
    auc = roc_auc_score(y_val, preds)

    return auc

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

print("Best params:", study.best_params)
print("Best AUC:", study.best_value)

[I 2025-11-08 20:22:39,036] A new study created in memory with name: no-name-d08f9e6b-b7cb-40e9-b914-79eba0cfa3fd
[I 2025-11-08 20:22:58,396] Trial 0 finished with value: 0.8893273603457645 and parameters: {'n_estimators': 602, 'max_depth': 5, 'learning_rate': 0.09519720139712477, 'subsample': 0.672171637740313, 'colsample_bytree': 0.6765453247250416, 'gamma': 0.38207639826386824, 'min_child_weight': 10}. Best is trial 0 with value: 0.8893273603457645.
[I 2025-11-08 20:23:21,681] Trial 1 finished with value: 0.8644349917378125 and parameters: {'n_estimators': 513, 'max_depth': 9, 'learning_rate': 0.1451563703473274, 'subsample': 0.7019463479975395, 'colsample_bytree': 0.6707323037397678, 'gamma': 0.35382553907270564, 'min_child_weight': 4}. Best is trial 0 with value: 0.8893273603457645.
[I 2025-11-08 20:23:34,729] Trial 2 finished with value: 0.8835272141661136 and parameters: {'n_estimators': 303, 'max_depth': 8, 'learning_rate': 0.07780931339221629, 'subsample': 0.612189365614459, '

Best params: {'n_estimators': 743, 'max_depth': 5, 'learning_rate': 0.07662531300380107, 'subsample': 0.8026500715550352, 'colsample_bytree': 0.6002985430277121, 'gamma': 0.30577919189570324, 'min_child_weight': 10}
Best AUC: 0.8907202546705432


In [10]:
# Extract best parameters
best_params = study.best_params  # from your Optuna study

# Optional: add fixed parameters
best_params.update({
    "eval_metric": "auc",
    "random_state": 42,
    "n_jobs": -1
})

# Train best model on training set
final_model = XGBClassifier(**best_params)
final_model.fit(X_train_resampled_log, y_train_resampled.values.ravel())

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.6002985430277121
,device,
,early_stopping_rounds,
,enable_categorical,False


In [11]:
# Evaluate final model on train set
y_train_final_pred = final_model.predict(X_train_resampled_log)
y_train_final_proba = final_model.predict_proba(X_train_resampled_log)[:, 1]

# classification report
print("Final Model Training Classification Report:\n")
print(classification_report(y_train_resampled, y_train_final_pred, digits=4))

# roc auc
roc_auc_train_final = roc_auc_score(y_train_resampled, y_train_final_proba)
print(f"Final Model Training ROC-AUC Score: {roc_auc_train_final:.4f}")

# pr auc
pr_auc_train_final = average_precision_score(y_train_resampled, y_train_final_proba)
print(f"Final Model Training PR-AUC Score: {pr_auc_train_final:.4f}")

# confusion matrix
print("Final Model Training Confusion Matrix:\n", confusion_matrix(y_train_resampled, y_train_final_pred))

Final Model Training Classification Report:

              precision    recall  f1-score   support

           0     0.9902    0.9995    0.9948    786838
           1     0.9995    0.9901    0.9948    786838

    accuracy                         0.9948   1573676
   macro avg     0.9949    0.9948    0.9948   1573676
weighted avg     0.9949    0.9948    0.9948   1573676

Final Model Training ROC-AUC Score: 0.9993
Final Model Training PR-AUC Score: 0.9994
Final Model Training Confusion Matrix:
 [[786453    385]
 [  7779 779059]]


In [12]:
# Evaluate final model on validation set
y_val_final_pred = final_model.predict(X_val_log)
y_val_final_proba = final_model.predict_proba(X_val_log)[:, 1]

# classification report
print("\nFinal Model Validation Classification Report:\n")
print(classification_report(y_val, y_val_final_pred, digits=4))

# roc auc
roc_auc_val_final = roc_auc_score(y_val, y_val_final_proba)
print(f"Final Model Validation ROC-AUC Score: {roc_auc_val_final:.4f}")

# pr auc
pr_auc_val_final = average_precision_score(y_val, y_val_final_proba)
print(f"Final Model Validation PR-AUC Score: {pr_auc_val_final:.4f}")

# confusion matrix
print("Final Model Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_final_pred))


Final Model Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9875    0.9985    0.9929    106718
           1     0.3731    0.0669    0.1135      1450

    accuracy                         0.9860    108168
   macro avg     0.6803    0.5327    0.5532    108168
weighted avg     0.9792    0.9860    0.9811    108168

Final Model Validation ROC-AUC Score: 0.8907
Final Model Validation PR-AUC Score: 0.1650
Final Model Validation Confusion Matrix:
 [[106555    163]
 [  1353     97]]


In [13]:
# Evaluate on Test Set
y_test_pred = xgb_baseline.predict(X_test_log)
y_test_proba = xgb_baseline.predict_proba(X_test_log)[:, 1]
# Classification report
print("\nTest Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))
# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Test ROC-AUC Score: {roc_auc_test:.4f}")
# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Test PR-AUC Score: {pr_auc_test:.4f}")
# Confusion matrix
print("Test Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))


Test Classification Report:

              precision    recall  f1-score   support

           0     0.9864    0.9985    0.9924     95415
           1     0.4406    0.0805    0.1362      1428

    accuracy                         0.9849     96843
   macro avg     0.7135    0.5395    0.5643     96843
weighted avg     0.9784    0.9849    0.9798     96843

Test ROC-AUC Score: 0.8729
Test PR-AUC Score: 0.1768
Test Confusion Matrix:
 [[95269   146]
 [ 1313   115]]


## Use original train with class weighting

### Baseline

In [16]:
# Extract 1D labels
y = y_train['fraud_bool'].values

# Compute class weight correctly
pos = (y == 1).sum()
neg = (y == 0).sum()
scale = neg / pos

# Baseline XGBoost model
xgb_baseline = XGBClassifier(
    n_estimators=300,
    max_depth=6,
    learning_rate=0.2,
    eval_metric='auc',
    random_state=42,
    scale_pos_weight=scale,   # float, not Series
    n_jobs=-1
)

# Fit
xgb_baseline.fit(X_train_log, y)

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [17]:
# Evaluate on Train Set
y_train_pred = xgb_baseline.predict(X_train_log)
y_train_proba = xgb_baseline.predict_proba(X_train_log)[:, 1]

# Classification report
print("Training Classification Report:\n")
print(classification_report(y_train, y_train_pred, digits=4))

# ROC-AUC
roc_auc_train = roc_auc_score(y_train, y_train_proba)
print(f"Training ROC-AUC Score: {roc_auc_train:.4f}")

# PR-AUC
pr_auc_train = average_precision_score(y_train, y_train_proba)
print(f"Training PR-AUC Score: {pr_auc_train:.4f}")

# Confusion matrix
print("Training Confusion Matrix:\n", confusion_matrix(y_train, y_train_pred))

Training Classification Report:

              precision    recall  f1-score   support

           0     0.9999    0.9233    0.9600    786838
           1     0.1177    0.9879    0.2103      8151

    accuracy                         0.9239    794989
   macro avg     0.5588    0.9556    0.5851    794989
weighted avg     0.9908    0.9239    0.9523    794989

Training ROC-AUC Score: 0.9869
Training PR-AUC Score: 0.4345
Training Confusion Matrix:
 [[726450  60388]
 [    99   8052]]


In [18]:
# Evaluate on Validation Set
y_val_pred = xgb_baseline.predict(X_val_log)
y_val_proba = xgb_baseline.predict_proba(X_val_log)[:, 1]  

# Classification report
print("\nValidation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))

# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Validation ROC-AUC Score: {roc_auc_val:.4f}")

# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Validation PR-AUC Score: {pr_auc_val:.4f}")

# Confusion matrix
print("Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))


Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9942    0.9020    0.9459    106718
           1     0.0786    0.6152    0.1394      1450

    accuracy                         0.8982    108168
   macro avg     0.5364    0.7586    0.5426    108168
weighted avg     0.9820    0.8982    0.9351    108168

Validation ROC-AUC Score: 0.8667
Validation PR-AUC Score: 0.1508
Validation Confusion Matrix:
 [[96262 10456]
 [  558   892]]


In [19]:
# Evaluate on Test Set
y_test_pred = xgb_baseline.predict(X_test_log)
y_test_proba = xgb_baseline.predict_proba(X_test_log)[:, 1]
# Classification report
print("\nTest Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))
# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Test ROC-AUC Score: {roc_auc_test:.4f}")
# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Test PR-AUC Score: {pr_auc_test:.4f}")
# Confusion matrix
print("Test Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))


Test Classification Report:

              precision    recall  f1-score   support

           0     0.9912    0.9489    0.9696     95415
           1     0.1136    0.4370    0.1803      1428

    accuracy                         0.9414     96843
   macro avg     0.5524    0.6930    0.5749     96843
weighted avg     0.9783    0.9414    0.9580     96843

Test ROC-AUC Score: 0.8431
Test PR-AUC Score: 0.1375
Test Confusion Matrix:
 [[90544  4871]
 [  804   624]]


### Hyperparameter Tuning

In [20]:
import optuna
from xgboost import XGBClassifier
from sklearn.metrics import roc_auc_score

def objective(trial):

    # Suggest hyperparameters
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 200, 800),
        "max_depth": trial.suggest_int("max_depth", 3, 10),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
        "gamma": trial.suggest_float("gamma", 0, 5),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 20),
        
        # Tune scale_pos_weight around the base imbalance ratio
        "scale_pos_weight": trial.suggest_float(
            "scale_pos_weight", 
            scale * 0.5, 
            scale * 2.0
        ),

        "early_stopping_rounds": 50,
        "eval_metric": "auc",
        "random_state": 42,
        "n_jobs": -1,
    }

    model = XGBClassifier(**params)

    model.fit(
        X_train_log, 
        y,
        eval_set=[(X_val_log, y_val)],
        verbose=False
    )

    # Predict on validation
    preds = model.predict_proba(X_val_log)[:, 1]
    auc = roc_auc_score(y_val, preds)

    return auc


In [21]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)
print("Best params:", study.best_params)

[I 2025-11-08 20:45:37,609] A new study created in memory with name: no-name-ebf09814-34ad-4a3d-a1e3-956f2401cdc4
[I 2025-11-08 20:45:40,394] Trial 0 finished with value: 0.890139953121698 and parameters: {'n_estimators': 606, 'max_depth': 4, 'learning_rate': 0.29291565992410284, 'subsample': 0.7101386627897796, 'colsample_bytree': 0.9898785308832807, 'gamma': 2.4134734661688912, 'min_child_weight': 17, 'scale_pos_weight': 174.7533526696692}. Best is trial 0 with value: 0.890139953121698.
[I 2025-11-08 20:45:47,309] Trial 1 finished with value: 0.8769869478761622 and parameters: {'n_estimators': 367, 'max_depth': 10, 'learning_rate': 0.036322949663834286, 'subsample': 0.8196392526966025, 'colsample_bytree': 0.9910628179757812, 'gamma': 3.297344245092242, 'min_child_weight': 18, 'scale_pos_weight': 142.83252613812937}. Best is trial 0 with value: 0.890139953121698.
[I 2025-11-08 20:45:59,679] Trial 2 finished with value: 0.8927052185876925 and parameters: {'n_estimators': 605, 'max_dept

Best params: {'n_estimators': 294, 'max_depth': 3, 'learning_rate': 0.09315320452369848, 'subsample': 0.7565807206346981, 'colsample_bytree': 0.6825077723092992, 'gamma': 1.4957570716034694, 'min_child_weight': 4, 'scale_pos_weight': 115.61355973727945}


In [22]:
# Extract best parameters
best_params = study.best_params  # from Optuna study

# Train final model on full training set
final_model = XGBClassifier(**best_params)
final_model.fit(X_train_log, y)

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.6825077723092992
,device,
,early_stopping_rounds,
,enable_categorical,False


In [23]:
# Evaluate on Train Set
y_train_pred = final_model.predict(X_train_log)
y_train_proba = final_model.predict_proba(X_train_log)[:, 1]

# Classification report
print("Final Model Training Classification Report:\n")
print(classification_report(y_train, y_train_pred, digits=4))

# ROC-AUC
roc_auc_train = roc_auc_score(y_train, y_train_proba)
print(f"Final Model Training ROC-AUC Score: {roc_auc_train:.4f}")

# PR-AUC
pr_auc_train = average_precision_score(y_train, y_train_proba)
print(f"Final Model Training PR-AUC Score: {pr_auc_train:.4f}") 

# Confusion matrix
print("Final Model Training Confusion Matrix:\n", confusion_matrix(y_train, y_train_pred))

Final Model Training Classification Report:

              precision    recall  f1-score   support

           0     0.9982    0.8062    0.8920    786838
           1     0.0440    0.8620    0.0838      8151

    accuracy                         0.8067    794989
   macro avg     0.5211    0.8341    0.4879    794989
weighted avg     0.9884    0.8067    0.8837    794989

Final Model Training ROC-AUC Score: 0.9134
Final Model Training PR-AUC Score: 0.1775
Final Model Training Confusion Matrix:
 [[634326 152512]
 [  1125   7026]]


In [24]:
# Evaluate on Validation Set
y_val_pred = final_model.predict(X_val_log)
y_val_proba = final_model.predict_proba(X_val_log)[:, 1]

# Classification report
print("\nFinal Model Validation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))

# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Final Model Validation ROC-AUC Score: {roc_auc_val:.4f}")

# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Final Model Validation PR-AUC Score: {pr_auc_val:.4f}")

# Confusion matrix
print("Final Model Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))


Final Model Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9975    0.7670    0.8672    106718
           1     0.0476    0.8566    0.0902      1450

    accuracy                         0.7682    108168
   macro avg     0.5225    0.8118    0.4787    108168
weighted avg     0.9847    0.7682    0.8568    108168

Final Model Validation ROC-AUC Score: 0.8940
Final Model Validation PR-AUC Score: 0.1828
Final Model Validation Confusion Matrix:
 [[81857 24861]
 [  208  1242]]


In [25]:
# Evaluate on Test Set
y_test_pred = final_model.predict(X_test_log)
y_test_proba = final_model.predict_proba(X_test_log)[:, 1]
# Classification report
print("\nTest Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))
# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Test ROC-AUC Score: {roc_auc_test:.4f}")
# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Test PR-AUC Score: {pr_auc_test:.4f}")
# Confusion matrix
print("Test Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))


Test Classification Report:

              precision    recall  f1-score   support

           0     0.9969    0.7937    0.8838     95415
           1     0.0571    0.8347    0.1069      1428

    accuracy                         0.7943     96843
   macro avg     0.5270    0.8142    0.4953     96843
weighted avg     0.9830    0.7943    0.8723     96843

Test ROC-AUC Score: 0.8964
Test PR-AUC Score: 0.2192
Test Confusion Matrix:
 [[75732 19683]
 [  236  1192]]


# Use Non Log-Transformed Features

In [5]:
X_train_resampled_nolog = X_train_resampled.drop(columns=log_transformed_columns)
X_train_nolog = X_train.drop(columns=log_transformed_columns)
X_val_nolog = X_val.drop(columns=log_transformed_columns)
X_test_nolog = X_test.drop(columns=log_transformed_columns)

## Use SMOTE resampled Training Data

### Baseline

In [27]:
# Baseline XGBoost model
xgb_baseline = XGBClassifier(
    n_estimators=300,          # number of trees
    max_depth=6,               # tree depth
    learning_rate=0.2,         # step size shrinkage
    eval_metric='auc',     # evaluation metric
    random_state=42,
    n_jobs=-1                  # use all CPU cores
)

# Fit on training data
xgb_baseline.fit(X_train_resampled_nolog, y_train_resampled.values.ravel())

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [28]:
# Evaluate on Train Set
y_train_pred = xgb_baseline.predict(X_train_resampled_nolog)
y_train_proba = xgb_baseline.predict_proba(X_train_resampled_nolog)[:, 1]

# Classification report
print("Training Classification Report:\n")
print(classification_report(y_train_resampled, y_train_pred, digits=4))

# ROC-AUC
roc_auc_train = roc_auc_score(y_train_resampled, y_train_proba)
print(f"Training ROC-AUC Score: {roc_auc_train:.4f}")

# PR-AUC
pr_auc_train = average_precision_score(y_train_resampled, y_train_proba)
print(f"Training PR-AUC Score: {pr_auc_train:.4f}")

# Confusion matrix
print("Training Confusion Matrix:\n", confusion_matrix(y_train_resampled, y_train_pred))

Training Classification Report:

              precision    recall  f1-score   support

           0     0.9911    0.9995    0.9953    786838
           1     0.9995    0.9911    0.9953    786838

    accuracy                         0.9953   1573676
   macro avg     0.9953    0.9953    0.9953   1573676
weighted avg     0.9953    0.9953    0.9953   1573676

Training ROC-AUC Score: 0.9996
Training PR-AUC Score: 0.9996
Training Confusion Matrix:
 [[786467    371]
 [  7040 779798]]


In [29]:
# Evaluate on validation Set
y_val_pred = xgb_baseline.predict(X_val_nolog)
y_val_proba = xgb_baseline.predict_proba(X_val_nolog)[:, 1]

# Classification report
print("\nValidation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))
# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Validation ROC-AUC Score: {roc_auc_val:.4f}")
# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Validation PR-AUC Score: {pr_auc_val:.4f}")
# Confusion matrix
print("Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))


Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9876    0.9981    0.9928    106718
           1     0.3500    0.0772    0.1266      1450

    accuracy                         0.9857    108168
   macro avg     0.6688    0.5376    0.5597    108168
weighted avg     0.9790    0.9857    0.9812    108168

Validation ROC-AUC Score: 0.8816
Validation PR-AUC Score: 0.1482
Validation Confusion Matrix:
 [[106510    208]
 [  1338    112]]


In [30]:
# Evaluate on Test Set
y_test_pred = xgb_baseline.predict(X_test_nolog)
y_test_proba = xgb_baseline.predict_proba(X_test_nolog)[:, 1]
# Classification report
print("\nTest Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))
# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Test ROC-AUC Score: {roc_auc_test:.4f}")
# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Test PR-AUC Score: {pr_auc_test:.4f}")
# Confusion matrix
print("Test Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))


Test Classification Report:

              precision    recall  f1-score   support

           0     0.9862    0.9985    0.9923     95415
           1     0.3966    0.0658    0.1129      1428

    accuracy                         0.9847     96843
   macro avg     0.6914    0.5322    0.5526     96843
weighted avg     0.9775    0.9847    0.9793     96843

Test ROC-AUC Score: 0.8722
Test PR-AUC Score: 0.1695
Test Confusion Matrix:
 [[95272   143]
 [ 1334    94]]


### Hyperparameter Tuning

In [33]:
# Define objective function for Optuna
def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 200, 800),
        "max_depth": trial.suggest_int("max_depth", 3, 10),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
        "gamma": trial.suggest_float("gamma", 0, 0.5),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 10),
        "eval_metric": "auc",
        "random_state": 42,
        "n_jobs": -1
    }

    model = XGBClassifier(**params)

    model.fit(
        X_train_resampled_nolog, 
        y_train_resampled.values.ravel(),
        eval_set=[(X_val_nolog, y_val)],
        verbose=False
    )

    # Predict on validation
    preds = model.predict_proba(X_val_nolog)[:, 1]
    auc = roc_auc_score(y_val, preds)

    return auc

In [34]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)
print("Best params:", study.best_params)
print("Best AUC:", study.best_value)

[I 2025-11-08 20:56:07,442] A new study created in memory with name: no-name-2ec885e4-33f7-44a1-b40c-0cc5dcd55b24
[I 2025-11-08 20:56:28,403] Trial 0 finished with value: 0.8605027914367933 and parameters: {'n_estimators': 674, 'max_depth': 4, 'learning_rate': 0.012079130208694536, 'subsample': 0.8073276520814414, 'colsample_bytree': 0.8751714909346767, 'gamma': 0.16480461846310113, 'min_child_weight': 10}. Best is trial 0 with value: 0.8605027914367933.
[I 2025-11-08 20:56:40,083] Trial 1 finished with value: 0.883032533050366 and parameters: {'n_estimators': 370, 'max_depth': 5, 'learning_rate': 0.20603762130318634, 'subsample': 0.9495619494330149, 'colsample_bytree': 0.7000139941348612, 'gamma': 0.4899668403238577, 'min_child_weight': 1}. Best is trial 1 with value: 0.883032533050366.
[I 2025-11-08 20:56:49,090] Trial 2 finished with value: 0.8794333115119384 and parameters: {'n_estimators': 241, 'max_depth': 6, 'learning_rate': 0.22031616355723949, 'subsample': 0.9043905897428742, 

Best params: {'n_estimators': 712, 'max_depth': 4, 'learning_rate': 0.09986848975318872, 'subsample': 0.7597662645423839, 'colsample_bytree': 0.836636691377709, 'gamma': 0.3685431876571106, 'min_child_weight': 7}
Best AUC: 0.8895065144295858


In [35]:
# Extract best parameters
best_params = study.best_params  # from Optuna study

# Train best model on training set
final_model = XGBClassifier(**best_params)
final_model.fit(X_train_resampled_nolog, y_train_resampled.values.ravel())

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.836636691377709
,device,
,early_stopping_rounds,
,enable_categorical,False


In [36]:
# Evaluate on Train Set
y_train_pred = final_model.predict(X_train_resampled_nolog)
y_train_proba = final_model.predict_proba(X_train_resampled_nolog)[:, 1]

# Classification report
print("Final Model Training Classification Report:\n")
print(classification_report(y_train_resampled, y_train_pred, digits=4))
# ROC-AUC
roc_auc_train = roc_auc_score(y_train_resampled, y_train_proba)
print(f"Final Model Training ROC-AUC Score: {roc_auc_train:.4f}")
# PR-AUC
pr_auc_train = average_precision_score(y_train_resampled, y_train_proba)
print(f"Final Model Training PR-AUC Score: {pr_auc_train:.4f}")
# Confusion matrix
print("Final Model Training Confusion Matrix:\n", confusion_matrix(y_train_resampled, y_train_pred))

Final Model Training Classification Report:

              precision    recall  f1-score   support

           0     0.9899    0.9990    0.9944    786838
           1     0.9990    0.9898    0.9944    786838

    accuracy                         0.9944   1573676
   macro avg     0.9944    0.9944    0.9944   1573676
weighted avg     0.9944    0.9944    0.9944   1573676

Final Model Training ROC-AUC Score: 0.9991
Final Model Training PR-AUC Score: 0.9993
Final Model Training Confusion Matrix:
 [[786052    786]
 [  8056 778782]]


In [37]:
# Evaluate on Validation Set
y_val_pred = final_model.predict(X_val_nolog)
y_val_proba = final_model.predict_proba(X_val_nolog)[:, 1]
# Classification report
print("\nFinal Model Validation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))
# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Final Model Validation ROC-AUC Score: {roc_auc_val:.4f}")
# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Final Model Validation PR-AUC Score: {pr_auc_val:.4f}")
# Confusion matrix
print("Final Model Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))


Final Model Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9877    0.9979    0.9928    106718
           1     0.3671    0.0876    0.1414      1450

    accuracy                         0.9857    108168
   macro avg     0.6774    0.5428    0.5671    108168
weighted avg     0.9794    0.9857    0.9814    108168

Final Model Validation ROC-AUC Score: 0.8894
Final Model Validation PR-AUC Score: 0.1623
Final Model Validation Confusion Matrix:
 [[106499    219]
 [  1323    127]]


In [38]:
# Evaluate on Test Set
y_test_pred = final_model.predict(X_test_nolog)
y_test_proba = final_model.predict_proba(X_test_nolog)[:, 1]
# Classification report
print("\nTest Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))
# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Test ROC-AUC Score: {roc_auc_test:.4f}")
# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Test PR-AUC Score: {pr_auc_test:.4f}")
# Confusion matrix
print("Test Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))


Test Classification Report:

              precision    recall  f1-score   support

           0     0.9863    0.9986    0.9924     95415
           1     0.4380    0.0742    0.1269      1428

    accuracy                         0.9849     96843
   macro avg     0.7122    0.5364    0.5597     96843
weighted avg     0.9782    0.9849    0.9796     96843

Test ROC-AUC Score: 0.8842
Test PR-AUC Score: 0.1900
Test Confusion Matrix:
 [[95279   136]
 [ 1322   106]]


## Use original train with class weighting

### Baseline

In [6]:
# Extract 1D labels
y = y_train['fraud_bool'].values

# Compute class weight correctly
pos = (y == 1).sum()
neg = (y == 0).sum()
scale = neg / pos

# Baseline XGBoost model
xgb_baseline = XGBClassifier(
    n_estimators=300,
    max_depth=6,
    learning_rate=0.2,
    eval_metric='auc',
    random_state=42,
    scale_pos_weight=scale,   # float, not Series
    n_jobs=-1
)

# Fit
xgb_baseline.fit(X_train_nolog, y)

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [7]:
# Evaluate on Validation Set
y_val_pred = xgb_baseline.predict(X_val_nolog)
y_val_proba = xgb_baseline.predict_proba(X_val_nolog)[:, 1]  

# Classification report
print("\nValidation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))

# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Validation ROC-AUC Score: {roc_auc_val:.4f}")

# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Validation PR-AUC Score: {pr_auc_val:.4f}")

# Confusion matrix
print("Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))


Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9941    0.9053    0.9476    106718
           1     0.0794    0.6014    0.1403      1450

    accuracy                         0.9012    108168
   macro avg     0.5367    0.7533    0.5439    108168
weighted avg     0.9818    0.9012    0.9368    108168

Validation ROC-AUC Score: 0.8661
Validation PR-AUC Score: 0.1438
Validation Confusion Matrix:
 [[96608 10110]
 [  578   872]]


In [8]:
# Evaluate on Test Set
y_test_pred = xgb_baseline.predict(X_test_nolog)
y_test_proba = xgb_baseline.predict_proba(X_test_nolog)[:, 1]
# Classification report
print("\nTest Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))
# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Test ROC-AUC Score: {roc_auc_test:.4f}")
# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Test PR-AUC Score: {pr_auc_test:.4f}")
# Confusion matrix
print("Test Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))


Test Classification Report:

              precision    recall  f1-score   support

           0     0.9911    0.9518    0.9711     95415
           1     0.1175    0.4286    0.1844      1428

    accuracy                         0.9441     96843
   macro avg     0.5543    0.6902    0.5777     96843
weighted avg     0.9782    0.9441    0.9595     96843

Test ROC-AUC Score: 0.8454
Test PR-AUC Score: 0.1350
Test Confusion Matrix:
 [[90818  4597]
 [  816   612]]


### Hyperparameter Tuning

In [13]:
import optuna
from xgboost import XGBClassifier
from sklearn.metrics import roc_auc_score

def objective(trial):

    # Suggest hyperparameters
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 200, 800),
        "max_depth": trial.suggest_int("max_depth", 3, 10),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
        "gamma": trial.suggest_float("gamma", 0, 5),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 20),
        
        # Tune scale_pos_weight around the base imbalance ratio
        "scale_pos_weight": trial.suggest_float(
            "scale_pos_weight", 
            scale * 0.5, 
            scale * 2.0
        ),

        "early_stopping_rounds": 50,
        "eval_metric": "auc",
        "random_state": 42,
        "n_jobs": -1,
    }

    model = XGBClassifier(**params)

    model.fit(
        X_train_nolog, 
        y,
        eval_set=[(X_val_nolog, y_val)],
        verbose=False
    )

    # Predict on validation
    preds = model.predict_proba(X_val_nolog)[:, 1]
    auc = roc_auc_score(y_val, preds)

    return auc

In [14]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)
print("Best params:", study.best_params)

[I 2025-11-10 14:54:52,782] A new study created in memory with name: no-name-c178e9ae-18b2-4239-b752-7867f4beb353
[I 2025-11-10 14:55:02,774] Trial 0 finished with value: 0.8919360176449568 and parameters: {'n_estimators': 452, 'max_depth': 6, 'learning_rate': 0.0399941378145437, 'subsample': 0.8209946780281052, 'colsample_bytree': 0.6133022249718514, 'gamma': 1.4303431977334284, 'min_child_weight': 11, 'scale_pos_weight': 135.98354671415194}. Best is trial 0 with value: 0.8919360176449568.
[I 2025-11-10 14:55:05,359] Trial 1 finished with value: 0.856017166738507 and parameters: {'n_estimators': 616, 'max_depth': 10, 'learning_rate': 0.255208994711477, 'subsample': 0.6633452047213713, 'colsample_bytree': 0.7091433134635231, 'gamma': 2.2628438331995566, 'min_child_weight': 11, 'scale_pos_weight': 167.26277126757253}. Best is trial 0 with value: 0.8919360176449568.
[I 2025-11-10 14:55:09,433] Trial 2 finished with value: 0.8926208098559465 and parameters: {'n_estimators': 313, 'max_dept

Best params: {'n_estimators': 668, 'max_depth': 4, 'learning_rate': 0.041220581254456184, 'subsample': 0.8866972561799367, 'colsample_bytree': 0.8225361744537156, 'gamma': 2.977080553654553, 'min_child_weight': 11, 'scale_pos_weight': 56.656079473016156}


In [15]:
# Extract best parameters
best_params = study.best_params  # from Optuna study

# Train final model on full training set
final_model = XGBClassifier(**best_params)
final_model.fit(X_train_nolog, y)

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.8225361744537156
,device,
,early_stopping_rounds,
,enable_categorical,False


In [16]:
# Evaluate on Train Set
y_train_pred = final_model.predict(X_train_nolog)
y_train_proba = final_model.predict_proba(X_train_nolog)[:, 1]

# Classification report
print("Final Model Training Classification Report:\n")
print(classification_report(y_train, y_train_pred, digits=4))

# ROC-AUC
roc_auc_train = roc_auc_score(y_train, y_train_proba)
print(f"Final Model Training ROC-AUC Score: {roc_auc_train:.4f}")

# PR-AUC
pr_auc_train = average_precision_score(y_train, y_train_proba)
print(f"Final Model Training PR-AUC Score: {pr_auc_train:.4f}") 

# Confusion matrix
print("Final Model Training Confusion Matrix:\n", confusion_matrix(y_train, y_train_pred))

Final Model Training Classification Report:

              precision    recall  f1-score   support

           0     0.9974    0.8985    0.9454    786838
           1     0.0732    0.7734    0.1337      8151

    accuracy                         0.8972    794989
   macro avg     0.5353    0.8360    0.5395    794989
weighted avg     0.9879    0.8972    0.9371    794989

Final Model Training ROC-AUC Score: 0.9290
Final Model Training PR-AUC Score: 0.1976
Final Model Training Confusion Matrix:
 [[706998  79840]
 [  1847   6304]]


In [17]:
# Evaluate on Validation Set
y_val_pred = final_model.predict(X_val_nolog)
y_val_proba = final_model.predict_proba(X_val_nolog)[:, 1]

# Classification report
print("\nFinal Model Validation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))

# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Final Model Validation ROC-AUC Score: {roc_auc_val:.4f}")

# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Final Model Validation PR-AUC Score: {pr_auc_val:.4f}")

# Confusion matrix
print("Final Model Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))


Final Model Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9960    0.8675    0.9273    106718
           1     0.0710    0.7455    0.1297      1450

    accuracy                         0.8658    108168
   macro avg     0.5335    0.8065    0.5285    108168
weighted avg     0.9836    0.8658    0.9166    108168

Final Model Validation ROC-AUC Score: 0.8946
Final Model Validation PR-AUC Score: 0.1815
Final Model Validation Confusion Matrix:
 [[92576 14142]
 [  369  1081]]


In [18]:
# Evaluate on Test Set
y_test_pred = final_model.predict(X_test_nolog)
y_test_proba = final_model.predict_proba(X_test_nolog)[:, 1]
# Classification report
print("\nTest Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))
# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Test ROC-AUC Score: {roc_auc_test:.4f}")
# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Test PR-AUC Score: {pr_auc_test:.4f}")
# Confusion matrix
print("Test Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))


Test Classification Report:

              precision    recall  f1-score   support

           0     0.9952    0.8842    0.9364     95415
           1     0.0847    0.7157    0.1514      1428

    accuracy                         0.8817     96843
   macro avg     0.5399    0.8000    0.5439     96843
weighted avg     0.9818    0.8817    0.9249     96843

Test ROC-AUC Score: 0.8971
Test PR-AUC Score: 0.2150
Test Confusion Matrix:
 [[84368 11047]
 [  406  1022]]
