In [4]:
import pandas as pd
import numpy as np
import joblib
import optuna
import mlflow
import mlflow.xgboost
from pathlib import Path
from xgboost import XGBClassifier
from sklearn.metrics import (
    confusion_matrix,
    classification_report,
    roc_curve,
    roc_auc_score,
    precision_recall_curve,
    auc,
    recall_score,
    fbeta_score, accuracy_score,
    f1_score
)

pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_rows', None)     # Show all rows

In [2]:
import joblib
from joblib import load

X_train, y_train, X_test, y_test = joblib.load('smote_dataset_splits.joblib')

In [None]:
def objective(trial):
    """Optuna objective function for XGBClassifier."""
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 200, 1000),
        "max_depth": trial.suggest_int("max_depth", 3, 10),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 10),
        "gamma": trial.suggest_float("gamma", 0.0, 5.0),
        "reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 10.0, log=True),
        "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 10.0, log=True),
        "scale_pos_weight": trial.suggest_float("scale_pos_weight", 1, 10), # Useful even with SMOTE
        "random_state": 42,
        "n_jobs": -1,
        "use_label_encoder": False,
        "eval_metric": "logloss"
    }

    # Start a nested MLflow run for each trial
    with mlflow.start_run(nested=True):
        model = XGBClassifier(**params)
        model.fit(X_train, y_train)

        # Predictions
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1]

        # Classification Metrics
        acc = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        roc_auc = roc_auc_score(y_test, y_prob)
        precision, recall, _ = precision_recall_curve(y_test, y_prob)
        pr_auc = auc(recall, precision)
        f2 = fbeta_score(y_test, y_pred, beta=2, pos_label=1)

        # Log to MLflow
        mlflow.log_params(params)
        mlflow.log_metrics({
            "accuracy": acc,
            "f1_score": f1,
            "roc_auc": roc_auc,
            "pr_auc": pr_auc,
            "f2_score": f2
        })

    # We maximize F1-score because of the fraud imbalance
    return f1

In [20]:
# 1. Setup MLflow Tracking
# Ensure this path matches your local project structure
mlflow.set_tracking_uri(r"file:///C:/Users/user/Desktop/ML & DL projects/Anti- Money Laundering classification/mlruns")
mlflow.set_experiment("AML_XGBoost_Optuna_Notebook")

# 2. Run the Optuna Study
with mlflow.start_run(run_name="XGB_Hyperparameter_Tuning"):
    print("Starting optimization...")
    
    # We use direction="maximize" because we want to maximize the F1-score
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=15)
    
    # 3. Retrieve Best Results
    print("\nBest params:", study.best_trial.params)
    best_params = study.best_trial.params
    
    # 4. Train Final Model with Best Parameters
    print("\nTraining final model with best parameters...")
    best_model = XGBClassifier(**best_params, random_state=42, use_label_encoder=False, eval_metric="logloss")
    best_model.fit(X_train, y_train)
    
    # 5. Final Evaluation
    y_pred = best_model.predict(X_test)
    y_prob = best_model.predict_proba(X_test)[:, 1]
    
    metrics = {
        "final_accuracy": accuracy_score(y_test, y_pred),
        "final_f1": f1_score(y_test, y_pred),
        "final_recall": recall_score(y_test, y_pred),
        "final_roc_auc": roc_auc_score(y_test, y_prob)
    }
    
    # 6. Log Best Model and Metrics to the Parent Run
    mlflow.log_params(best_params)
    mlflow.log_metrics(metrics)
    mlflow.xgboost.log_model(best_model, artifact_path="best_model")
    
    print("\n--- Final Model Performance ---")
    for name, value in metrics.items():
        print(f"{name}: {value:.4f}")
        
    print("\nDetailed Classification Report:")
    print(classification_report(y_test, y_pred))



[I 2026-01-28 15:46:56,724] A new study created in memory with name: no-name-b98aa233-bcd4-453b-b1ea-98c1a2113e36


Starting optimization...


[I 2026-01-28 15:46:57,549] Trial 0 finished with value: 0.41963509991311904 and parameters: {'n_estimators': 673, 'max_depth': 8, 'learning_rate': 0.06367818016075064, 'subsample': 0.6515684031163799, 'colsample_bytree': 0.9500054096484398, 'min_child_weight': 7, 'gamma': 1.8099738884228622, 'reg_alpha': 0.16816672962224202, 'reg_lambda': 0.015174400123502999, 'scale_pos_weight': 8.400634833603963}. Best is trial 0 with value: 0.41963509991311904.
[I 2026-01-28 15:46:58,160] Trial 1 finished with value: 0.4015594541910331 and parameters: {'n_estimators': 809, 'max_depth': 3, 'learning_rate': 0.012863738386500461, 'subsample': 0.6294882925188217, 'colsample_bytree': 0.6432320388600985, 'min_child_weight': 7, 'gamma': 1.5107480999108114, 'reg_alpha': 2.6131806647170496e-07, 'reg_lambda': 3.704790550472849e-08, 'scale_pos_weight': 9.601114267621666}. Best is trial 0 with value: 0.41963509991311904.
[I 2026-01-28 15:46:58,725] Trial 2 finished with value: 0.46270423869287236 and parameter


Best params: {'n_estimators': 407, 'max_depth': 6, 'learning_rate': 0.2618334741789985, 'subsample': 0.505834406353306, 'colsample_bytree': 0.6127855365718692, 'min_child_weight': 10, 'gamma': 4.24376805683629, 'reg_alpha': 0.00016735055353851895, 'reg_lambda': 6.950488982830555, 'scale_pos_weight': 4.143270205332353}

Training final model with best parameters...





--- Final Model Performance ---
final_accuracy: 0.8370
final_f1: 0.5065
final_recall: 0.9208
final_roc_auc: 0.9484

Detailed Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.83      0.90     10355
           1       0.35      0.92      0.51      1035

    accuracy                           0.84     11390
   macro avg       0.67      0.87      0.70     11390
weighted avg       0.93      0.84      0.87     11390



In [21]:
# Save the model locally for convenience
joblib.dump(best_model, "best_xgb_classifier.joblib")

['best_xgb_classifier.joblib']