# Import Libraries

In [1]:
import os
import optuna
import pickle
import mlflow
import joblib
import numpy as np
import mlflow.sklearn
import xgboost as xgb
import seaborn as sns
import matplotlib.pyplot as plt
from mlflow.tracking import MlflowClient
from imblearn.over_sampling import SMOTE
from optuna.integration.mlflow import MLflowCallback
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, f1_score, recall_score, precision_score

# Load Data

In [2]:
X_train, X_test, Y_train, Y_test = joblib.load("../data/processed/split_data.pkl")

# Create New MLflow Experiment

In [3]:
mlruns_path = os.path.abspath("../mlruns")
mlflow.set_tracking_uri(f"file:..///{mlruns_path}")
mlflc = MLflowCallback(
    tracking_uri=mlflow.get_tracking_uri(),
    metric_name="profit"
)

study = optuna.create_study(
    study_name="XGB + SMOTE Profit Tuning",
    direction="maximize",
    sampler=optuna.samplers.TPESampler(seed=42),
    storage="sqlite:///../mlruns/xgb_smote_profit_tuning.db",
    load_if_exists=True
)

  mlflc = MLflowCallback(
[I 2025-05-27 22:34:30,503] A new study created in RDB with name: XGB + SMOTE Profit Tuning


# Hyperparameter Tuning XGBoost + SMOTE Model (Bayesian Search)

In [4]:
AVG_FRAUD, TP_FEE, FP_COST = 120, 0.10, -5
FN_PEN_TUNE = -0.25

X_dev, X_val, y_dev, y_val = train_test_split(
X_train, Y_train, test_size=0.15, stratify=Y_train, random_state=42
)
y_dev_np = y_dev.to_numpy()

def profit(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    return TP_FEE * tp * AVG_FRAUD + FP_COST * fp + FN_PEN_TUNE * fn * AVG_FRAUD

def objective(trial):
    params = {
    "n_estimators": trial.suggest_int("n_estimators", 300, 1200),
    "learning_rate": trial.suggest_float("learning_rate", 0.0005, 0.2, log=True),
    "max_depth": trial.suggest_int("max_depth", 3, 9),
    "min_child_weight": trial.suggest_float("min_child_weight", 0.01, 10.0, log=True),
    "gamma": trial.suggest_float("gamma", 0.0, 0.5),
    "subsample": trial.suggest_float("subsample", 0.5, 1.0),
    "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
    "max_delta_step": trial.suggest_int("max_delta_step", 0, 10),
    "objective": "binary:logistic",
    "eval_metric": "logloss",
    "random_state": 42,
    "n_jobs": -1,
    "tree_method": "hist"
    }

    model = xgb.XGBClassifier(**params)
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    scores = []

    for train_idx, val_idx in cv.split(X_dev, y_dev_np):
        X_tr, y_tr = X_dev.iloc[train_idx], y_dev_np[train_idx]
        X_va, y_va = X_dev.iloc[val_idx], y_dev_np[val_idx]

        smote = SMOTE(random_state=42)
        X_tr_res, y_tr_res = smote.fit_resample(X_tr, y_tr)

        model.fit(X_tr_res, y_tr_res, eval_set=[(X_va, y_va)], verbose=False)
        preds = (model.predict_proba(X_va)[:, 1] >= 0.5).astype(int)
        scores.append(profit(y_va, preds))

    return np.mean(scores)

study.optimize(objective, n_trials=50, timeout=6000, callbacks=[mlflc], show_progress_bar=True)

print("\nBest Cross-Validated Profit:", f"${study.best_value:,.0f}")
print("Best Params:\n", study.best_trial.params)

  0%|          | 0/50 [00:00<?, ?it/s]

2025/05/27 22:35:23 INFO mlflow.tracking.fluent: Experiment with name 'XGB + SMOTE Profit Tuning' does not exist. Creating a new experiment.


[I 2025-05-27 22:35:23,461] Trial 0 finished with value: 305.4 and parameters: {'n_estimators': 637, 'learning_rate': 0.14886262201211786, 'max_depth': 8, 'min_child_weight': 0.6251373574521749, 'gamma': 0.07800932022121826, 'subsample': 0.5779972601681014, 'colsample_bytree': 0.6232334448672797, 'max_delta_step': 9}. Best is trial 0 with value: 305.4.
[I 2025-05-27 22:36:36,616] Trial 1 finished with value: -117.39999999999998 and parameters: {'n_estimators': 841, 'learning_rate': 0.034787028257996074, 'max_depth': 3, 'min_child_weight': 8.123245085588687, 'gamma': 0.41622132040021087, 'subsample': 0.6061695553391381, 'colsample_bytree': 0.6727299868828402, 'max_delta_step': 2}. Best is trial 0 with value: 305.4.
[I 2025-05-27 22:37:41,562] Trial 2 finished with value: -225.4 and parameters: {'n_estimators': 574, 'learning_rate': 0.011598924466766455, 'max_depth': 6, 'min_child_weight': 0.07476312062252301, 'gamma': 0.30592644736118974, 'subsample': 0.569746930326021, 'colsample_bytre

# Hyperparameter Tuning Validation Curves

In [5]:
optuna.visualization.plot_optimization_history(study).show()
optuna.visualization.plot_param_importances(study).show()
optuna.visualization.plot_slice(study).show()

# Load Baseline XGBoost Models from MLflow

In [6]:
mlruns_path = os.path.abspath("../mlruns")
mlflow.set_tracking_uri(f"file:..///{mlruns_path}")
client = MlflowClient()
experiment_id = "811282905201061413"
runs = client.search_runs(
    experiment_ids=[experiment_id],
    order_by=["start_time desc"]
)

xgb_smote_run = next(
run for run in runs
if run.data.tags.get("mlflow.runName", "") == "XGBoost + SMOTE"
)
xgb_smote_run_id = xgb_smote_run.info.run_id
print(f"Found run ID for XGBoost + SMOTE: {xgb_smote_run_id}")

xgb_smote_artifact_path = os.path.join(mlruns_path, experiment_id, xgb_smote_run_id, "artifacts", "XGBoost_+_SMOTE.pkl")

with open(xgb_smote_artifact_path, "rb") as f:
    xgb_smote_baseline_model = pickle.load(f)

Found run ID for XGBoost + SMOTE: efb7605b00b64fbd9512e6c3035cc794


# Final XGBoost + SMOTE Model Training & Test Set Evaluation

In [7]:
best_params = {
**study.best_trial.params,
"objective": "binary:logistic",
"eval_metric": "logloss",
"random_state": 42,
"n_jobs": -1,
"tree_method": "hist"
}

final_xgb_smote_model = xgb.XGBClassifier(**best_params)
final_xgb_smote_model.fit(X_train, Y_train)

xgb_smote_probs = final_xgb_smote_model.predict_proba(X_test)[:, 1]
xgb_smote_probs_baseline = xgb_smote_baseline_model.predict_proba(X_test)[:, 1]
taus = np.linspace(0.0, 1.0, 501)
xgb_smote_profits = [profit(Y_test, (xgb_smote_probs >= t).astype(int)) for t in taus]
xgb_smote_profits_baseline = [profit(Y_test, (xgb_smote_probs_baseline >= t).astype(int)) for t in taus]

xgb_smote_best_tau_idx = int(np.argmax(xgb_smote_profits))
xgb_smote_best_tau_idx_baseline = int(np.argmax(xgb_smote_profits_baseline))
xgb_smote_best_tau = taus[xgb_smote_best_tau_idx]
xgb_smote_best_tau_baseline = taus[xgb_smote_best_tau_idx_baseline]
xgb_smote_best_profit = xgb_smote_profits[xgb_smote_best_tau_idx]
xgb_smote_best_profit_baseline = xgb_smote_profits_baseline[xgb_smote_best_tau_idx_baseline]

print(f"Baseline XGB Optimal Threshold @ 25% FN Penalty = {xgb_smote_best_tau_baseline:.3f}, Max Profit = ${xgb_smote_best_profit_baseline:,.0f}")
print(f"Tuned XGB Optimal Threshold @ 25% FN Penalty = {xgb_smote_best_tau:.3f}, Max Profit = ${xgb_smote_best_profit:,.0f}")
improvement = xgb_smote_best_profit - xgb_smote_best_profit_baseline
print(f"Profit Improvement from Tuning = ${improvement:.0f}")

Baseline XGB Optimal Threshold @ 25% FN Penalty = 0.842, Max Profit = $538
Tuned XGB Optimal Threshold @ 25% FN Penalty = 0.030, Max Profit = $577
Profit Improvement from Tuning = $39


# Compare and Log Tuned XGB + SMOTE Model

In [8]:
y_pred_base = xgb_smote_baseline_model.predict(X_test)
y_proba_base = xgb_smote_baseline_model.predict_proba(X_test)[:, 1]

y_pred_tuned = final_xgb_smote_model.predict(X_test)
y_proba_tuned = final_xgb_smote_model.predict_proba(X_test)[:, 1]

print("\nConfusion Matrix - Baseline:")
print(confusion_matrix(Y_test, y_pred_base))
print("\nClassification Report - Baseline:")
print(classification_report(Y_test, y_pred_base))

print("\nConfusion Matrix - Tuned:")
print(confusion_matrix(Y_test, y_pred_tuned))
print("\nClassification Report - Tuned:")
print(classification_report(Y_test, y_pred_tuned))

metrics = {
"roc_auc": roc_auc_score(Y_test, y_proba_tuned),
"average_precision": average_precision_score(Y_test, y_proba_tuned),
"f1": f1_score(Y_test, y_pred_tuned),
"recall": recall_score(Y_test, y_pred_tuned),
"precision": precision_score(Y_test, y_pred_tuned)
}

confusion_matrix_path = "confusion_matrix.txt"
classification_report_path = "classification_report.txt"
with open(confusion_matrix_path, "w") as f:
    f.write(f"Confusion Matrix:\n{confusion_matrix(Y_test, y_pred_tuned)}")
with open(classification_report_path, "w") as f:
    f.write(f"Classification Report:\n{classification_report(Y_test, y_pred_tuned)}")

model_name = "Final XGB + SMOTE Tuned Model"
local_model_dir = os.path.abspath("../models")
os.makedirs(local_model_dir, exist_ok=True)
local_model_path = os.path.join(local_model_dir, f"{model_name}.pkl")
with open(local_model_path, "wb") as f:
    pickle.dump(final_xgb_smote_model, f)

mlflow_model_path = f"{model_name}.pkl"
with open(mlflow_model_path, "wb") as f:
    pickle.dump(final_xgb_smote_model, f)

with mlflow.start_run(run_name=model_name):
    mlflow.log_metrics(metrics)
    mlflow.log_artifact(confusion_matrix_path)
    mlflow.log_artifact(classification_report_path)
    mlflow.log_artifact(mlflow_model_path)

    mlflow.sklearn.log_model(final_xgb_smote_model, artifact_path="model", registered_model_name=model_name)
mlflow.end_run()

os.remove(confusion_matrix_path)
os.remove(classification_report_path)
os.remove(mlflow_model_path)


Confusion Matrix - Baseline:
[[56834    30]
 [   14    84]]

Classification Report - Baseline:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.74      0.86      0.79        98

    accuracy                           1.00     56962
   macro avg       0.87      0.93      0.90     56962
weighted avg       1.00      1.00      1.00     56962


Confusion Matrix - Tuned:
[[56860     4]
 [   20    78]]

Classification Report - Tuned:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.95      0.80      0.87        98

    accuracy                           1.00     56962
   macro avg       0.98      0.90      0.93     56962
weighted avg       1.00      1.00      1.00     56962



Successfully registered model 'Final XGB + SMOTE Tuned Model'.
Created version '1' of model 'Final XGB + SMOTE Tuned Model'.


# Stage Tuned XGBoost + SMOTE Model

In [9]:
client = MlflowClient()
latest_version = client.get_latest_versions(model_name, stages=["None"])[0].version
client.transition_model_version_stage(
name=model_name,
version=latest_version,
stage="Staging",
archive_existing_versions=True
)
print(f"Pushed {model_name} version {latest_version} to Staging.")


``mlflow.tracking.client.MlflowClient.get_latest_versions`` is deprecated since 2.9.0. Model registry stages will be removed in a future major release. To learn more about the deprecation of model registry stages, see our migration guide here: https://mlflow.org/docs/latest/model-registry.html#migrating-from-stages



Pushed Final XGB + SMOTE Tuned Model version 1 to Staging.



``mlflow.tracking.client.MlflowClient.transition_model_version_stage`` is deprecated since 2.9.0. Model registry stages will be removed in a future major release. To learn more about the deprecation of model registry stages, see our migration guide here: https://mlflow.org/docs/latest/model-registry.html#migrating-from-stages



# Hyperparameter Tuning (XGB + SMOTE) Insights

**Goal:** 
Optimize XGBoost + SMOTE using business-aligned profit as the objective under a 25% FN-penalty, where false negatives are moderately costly.

**Approach**
- Applied Optuna with Bayesian optimization (`TPESampler`) to tune hyperparameters via stratified 5-fold CV.
- SMOTE oversampling performed inside each CV fold to avoid data leakage.
- Evaluated tuned model on test set by scanning thresholds (0 → 1) to identify profit-maximizing decision point.
- Compared against previously saved XGBoost + SMOTE baseline from MLflow.

**Key Results**
- Tuned Optimal Threshold (25% FN Penalty): **0.030**
- Max Profit (Tuned): **\$577**
- Baseline Profit (25% FN Penalty): **\$538**
- Improvement from Tuning: **\$39 gain per 56k transactions**

**Model Evaluation**
- Tuned model reduced false positives by over 85% (from 30 → 4) while maintaining high recall.
- Precision, F1, and macro-averaged metrics improved, indicating better balance.
- Confusion matrix and classification report confirm stronger fraud detection with fewer incorrect alerts.

**MLOps**
- Tuned model saved locally and logged via MLflow.
- Metrics, confusion matrix, and classification report were tracked as artifacts.
- Final model registered and promoted to **Staging** in MLflow Model Registry.