# Import Libraries

In [1]:
import os
import optuna
import pickle
import mlflow
import joblib
import numpy as np
import mlflow.sklearn
import xgboost as xgb
import seaborn as sns
import matplotlib.pyplot as plt
from mlflow.tracking import MlflowClient
from imblearn.over_sampling import SMOTE
from optuna.integration.mlflow import MLflowCallback
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, f1_score, recall_score, precision_score

# Load Data

In [2]:
X_train, X_test, Y_train, Y_test = joblib.load("../data/processed/split_data.pkl")

# Create New MLflow Experiment

In [3]:
mlruns_path = os.path.abspath("../mlruns")
mlflow.set_tracking_uri(f"file:..///{mlruns_path}")
mlflc = MLflowCallback(
    tracking_uri=mlflow.get_tracking_uri(),
    metric_name="profit"
)

study = optuna.create_study(
    study_name="XGB Profit Tuning",
    direction="maximize",
    sampler=optuna.samplers.TPESampler(seed=42),
    storage="sqlite:///../mlruns/xgb_profit_tuning.db",
    load_if_exists=True
)

  mlflc = MLflowCallback(
[I 2025-05-27 16:51:51,018] A new study created in RDB with name: XGB Profit Tuning


# Hyperparameter Tuning XGBoost Model (Bayesian Search)

In [4]:
AVG_FRAUD, TP_FEE, FP_COST = 120, 0.10, -5
FN_PEN_TUNE = -0.60

X_dev, X_val, y_dev, y_val = train_test_split(
X_train, Y_train, test_size=0.15, stratify=Y_train, random_state=42
)
y_dev_np = y_dev.to_numpy()

def profit(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    return TP_FEE * tp * AVG_FRAUD + FP_COST * fp + FN_PEN_TUNE * fn * AVG_FRAUD

def objective(trial):
    params = {
    "n_estimators" : trial.suggest_int("n_estimators", 600, 900),
    "learning_rate" : trial.suggest_float("learning_rate", 0.01, 0.2, log=True),
    "max_depth" : trial.suggest_int("max_depth", 4, 8),
    "min_child_weight" : trial.suggest_float("min_child_weight", 0.1, 5.0, log=True),
    "gamma" : trial.suggest_float("gamma", 0.0, 0.4),
    "subsample" : trial.suggest_float("subsample", 0.7, 1.0),
    "colsample_bytree" : trial.suggest_float("colsample_bytree", 0.6, 1.0),
    "scale_pos_weight" : trial.suggest_float("scale_pos_weight", 5, 40),
    "max_delta_step" : trial.suggest_int("max_delta_step", 0, 10),
    "objective" : "binary:logistic",
    "eval_metric" : "logloss",
    "random_state" : 42,
    "n_jobs" : -1,
    "tree_method" : "hist"
    }

    model = xgb.XGBClassifier(**params)
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    scores = []

    for train_idx, val_idx in cv.split(X_dev, y_dev_np):
        model.fit(X_dev.iloc[train_idx], y_dev_np[train_idx])
        preds = (model.predict_proba(X_dev.iloc[val_idx])[:, 1] >= 0.5).astype(int)
        score = profit(y_dev_np[val_idx], preds)
        scores.append(score)

    return np.mean(scores)

study.optimize(objective, n_trials=50, timeout=5000, callbacks=[mlflc], show_progress_bar=True)

print("\nBest Cross-Validated Profit:", f"${study.best_value:,.0f}")
print("Best Params:\n", study.best_trial.params)
mlflow.end_run()

  0%|          | 0/50 [00:00<?, ?it/s]

2025/05/27 16:54:41 INFO mlflow.tracking.fluent: Experiment with name 'XGB Profit Tuning' does not exist. Creating a new experiment.


[I 2025-05-27 16:54:41,159] Trial 0 finished with value: -259.59999999999985 and parameters: {'n_estimators': 712, 'learning_rate': 0.17254716573280354, 'max_depth': 7, 'min_child_weight': 1.0401663679887312, 'gamma': 0.06240745617697461, 'subsample': 0.7467983561008608, 'colsample_bytree': 0.6232334448672797, 'scale_pos_weight': 35.31616510212273, 'max_delta_step': 6}. Best is trial 0 with value: -259.59999999999985.
[I 2025-05-27 16:55:28,005] Trial 1 finished with value: -279.3999999999999 and parameters: {'n_estimators': 813, 'learning_rate': 0.010636066512540286, 'max_depth': 8, 'min_child_weight': 2.595942550311263, 'gamma': 0.08493564427131046, 'subsample': 0.7545474901621302, 'colsample_bytree': 0.6733618039413735, 'scale_pos_weight': 15.64847850358382, 'max_delta_step': 5}. Best is trial 0 with value: -259.59999999999985.
[I 2025-05-27 16:56:11,186] Trial 2 finished with value: -325.79999999999995 and parameters: {'n_estimators': 730, 'learning_rate': 0.023927528765580644, 'ma

# Hyperparameter Tuning Validation Curves

In [9]:
optuna.visualization.plot_optimization_history(study).show()
optuna.visualization.plot_param_importances(study).show()
optuna.visualization.plot_slice(study).show()

# Load Baseline XGBoost Models from MLflow

In [5]:
mlruns_path = os.path.abspath("../mlruns")
mlflow.set_tracking_uri(f"file:..///{mlruns_path}")
client = MlflowClient()
experiment_id = "811282905201061413"
runs = client.search_runs(
    experiment_ids=[experiment_id],
    order_by=["start_time desc"]
)

xgb_run = next(
run for run in runs
if run.data.tags.get("mlflow.runName", "") == "XGBoost"
)
xgb_run_id = xgb_run.info.run_id
print(f"Found run ID for XGBoost: {xgb_run_id}")

xgb_artifact_path = os.path.join(mlruns_path, experiment_id, xgb_run_id, "artifacts", "XGBoost.pkl")

with open(xgb_artifact_path, "rb") as f:
    xgb_baseline_model = pickle.load(f)

Found run ID for XGBoost: 0ec9e0741ccf48a38cbc71a2df61a030


# Final XGBoost Model Training & Test Set Evaluation

In [6]:
best_params = {
**study.best_trial.params,
"objective": "binary:logistic",
"eval_metric": "logloss",
"random_state": 42,
"n_jobs": -1,
"tree_method": "hist"
}

final_xgb_model = xgb.XGBClassifier(**best_params)
final_xgb_model.fit(X_train, Y_train)

xgb_probs = final_xgb_model.predict_proba(X_test)[:, 1]
xgb_probs_baseline = xgb_baseline_model.predict_proba(X_test)[:, 1]
taus = np.linspace(0.0, 1.0, 501)
xgb_profits = [profit(Y_test, (xgb_probs >= t).astype(int)) for t in taus]
xgb_profits_baseline = [profit(Y_test, (xgb_probs_baseline >= t).astype(int)) for t in taus]

xgb_best_tau_idx = int(np.argmax(xgb_profits))
xgb_best_tau_idx_baseline = int(np.argmax(xgb_profits_baseline))
xgb_best_tau = taus[xgb_best_tau_idx]
xgb_best_tau_baseline = taus[xgb_best_tau_idx_baseline]
xgb_best_profit = xgb_profits[xgb_best_tau_idx]
xgb_best_profit_baseline = xgb_profits_baseline[xgb_best_tau_idx_baseline]

print(f"Baseline XGB Optimal Threshold @ 60% FN Penalty = {xgb_best_tau_baseline:.3f}, Max Profit = ${xgb_best_profit_baseline:,.0f}")
print(f"Tuned XGB Optimal Threshold @ 60% FN Penalty = {xgb_best_tau:.3f}, Max Profit = ${xgb_best_profit:,.0f}")
improvement = xgb_best_profit - xgb_best_profit_baseline
print(f"Profit Improvement from Tuning = ${improvement:.0f}")


Baseline XGB Optimal Threshold @ 60% FN Penalty = 0.064, Max Profit = $-26
Tuned XGB Optimal Threshold @ 60% FN Penalty = 0.250, Max Profit = $73
Profit Improvement from Tuning = $99


# Compare and Log Tuned XGB Model

In [7]:
y_pred_base = xgb_baseline_model.predict(X_test)
y_proba_base = xgb_baseline_model.predict_proba(X_test)[:, 1]

y_pred_tuned = final_xgb_model.predict(X_test)
y_proba_tuned = final_xgb_model.predict_proba(X_test)[:, 1]

print("\nConfusion Matrix - Baseline:")
print(confusion_matrix(Y_test, y_pred_base))
print("\nClassification Report - Baseline:")
print(classification_report(Y_test, y_pred_base))

print("\nConfusion Matrix - Tuned:")
print(confusion_matrix(Y_test, y_pred_tuned))
print("\nClassification Report - Tuned:")
print(classification_report(Y_test, y_pred_tuned))

metrics = {
"roc_auc": roc_auc_score(Y_test, y_proba_tuned),
"average_precision": average_precision_score(Y_test, y_proba_tuned),
"f1": f1_score(Y_test, y_pred_tuned),
"recall": recall_score(Y_test, y_pred_tuned),
"precision": precision_score(Y_test, y_pred_tuned)
}

confusion_matrix_path = "confusion_matrix.txt"
classification_report_path = "classification_report.txt"
with open(confusion_matrix_path, "w") as f:
    f.write(f"Confusion Matrix:\n{confusion_matrix(Y_test, y_pred_tuned)}")
with open(classification_report_path, "w") as f:
    f.write(f"Classification Report:\n{classification_report(Y_test, y_pred_tuned)}")

model_name = "XGB_Tuned_Model"
local_model_dir = os.path.abspath("../models")
os.makedirs(local_model_dir, exist_ok=True)
local_model_path = os.path.join(local_model_dir, f"{model_name}.pkl")
with open(local_model_path, "wb") as f:
    pickle.dump(final_xgb_model, f)

mlflow_model_path = f"{model_name}.pkl"
with open(mlflow_model_path, "wb") as f:
    pickle.dump(final_xgb_model, f)

with mlflow.start_run(run_name=model_name):
    mlflow.log_metrics(metrics)
    mlflow.log_artifact(confusion_matrix_path)
    mlflow.log_artifact(classification_report_path)
    mlflow.log_artifact(mlflow_model_path)

    mlflow.sklearn.log_model(final_xgb_model, artifact_path="model", registered_model_name=model_name)
mlflow.end_run()

os.remove(confusion_matrix_path)
os.remove(classification_report_path)
os.remove(mlflow_model_path)


Confusion Matrix - Baseline:
[[56854    10]
 [   16    82]]

Classification Report - Baseline:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.89      0.84      0.86        98

    accuracy                           1.00     56962
   macro avg       0.95      0.92      0.93     56962
weighted avg       1.00      1.00      1.00     56962


Confusion Matrix - Tuned:
[[56851    13]
 [   17    81]]

Classification Report - Tuned:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.86      0.83      0.84        98

    accuracy                           1.00     56962
   macro avg       0.93      0.91      0.92     56962
weighted avg       1.00      1.00      1.00     56962



Successfully registered model 'XGB_Tuned_Model'.
Created version '1' of model 'XGB_Tuned_Model'.


# Stage Tuned XGBoost Model

In [8]:
client = MlflowClient()
latest_version = client.get_latest_versions(model_name, stages=["None"])[0].version
client.transition_model_version_stage(
name=model_name,
version=latest_version,
stage="Staging",
archive_existing_versions=True
)
print(f"Pushed {model_name} version {latest_version} to Staging.")

Pushed XGB_Tuned_Model version 1 to Staging.


  latest_version = client.get_latest_versions(model_name, stages=["None"])[0].version
  client.transition_model_version_stage(


# Hyperparameter Tuning (XGB) Insights

**Goal:** 
Optimize XGBoost using business-aligned profit as the objective under a 60% FN-penalty assumption to reflect costly fraud misses.

**Approach**
- Applied Optuna with Bayesian optimization (`TPESampler`) to tune hyperparameters on a stratified 5-fold CV.
- Tuned model evaluated on test set by simulating thresholds from 0 → 1 to find the profit-maximizing threshold.
- Compared to a previously logged baseline XGBoost model retrieved from MLflow.

**Key Results**
- Tuned XGB Optimal Threshold (60% FN Penalty): **0.250**
- Max Profit (Tuned): **\$73**
- Baseline XGB Profit (60% FN Penalty): **-\$26**
- Improvement from Tuning: **\$99 gain per 56k transactions**

**Model Evaluation**
- Improved recall at a moderate cost to precision.
- ROC-AUC and PR-AUC also slightly improved over the baseline.
- Confusion matrix and classification report show fewer false negatives with the tuned model.

**MLOps**
- Tuned model logged and registered via MLflow.
- Metrics, confusion matrix, and classification report saved as tracked artifacts.
- Model version pushed to **Staging** in MLflow Registry.