### Optimizing based on the main.ipynb
- Adding hyperparameter tuning.
- Changing ROC-AUC to PR-AUC due to imbalanced data.
- Tuning threshold to determine better cut off.
- Optimizing F2-score instead of F1-score because in this case, recall is more important than precision.
- Changing KFold to StratifiedKFold to keep the ratio of classes in each fold.

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    average_precision_score, precision_recall_curve, classification_report,
    confusion_matrix, fbeta_score
)
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline
import mlflow
import mlflow.sklearn
from mlflow.models.signature import infer_signature
import optuna
from optuna.samplers import TPESampler
import warnings
warnings.filterwarnings('ignore')
optuna.logging.set_verbosity(optuna.logging.WARNING)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
THRESHOLD = 0.3  
N_FOLDS = 5
RANDOM_STATE = 42
USE_SMOTE = True  # Handle imbalanced data

# MLflow setup
mlflow.set_tracking_uri('sqlite:///mlflow.db')
mlflow.set_experiment("Diabetes Prediction - Stacking PR-AUC")

# SMOTE config
smote = SMOTE(random_state=RANDOM_STATE, sampling_strategy='minority')

2026/02/02 20:13:20 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2026/02/02 20:13:20 INFO mlflow.store.db.utils: Updating database tables
2026/02/02 20:13:20 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/02/02 20:13:20 INFO alembic.runtime.migration: Will assume non-transactional DDL.
2026/02/02 20:13:20 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/02/02 20:13:20 INFO alembic.runtime.migration: Will assume non-transactional DDL.


In [3]:
# Read data
df1 = pd.read_csv('../data/db1.csv')
df2 = pd.read_csv('../data/db2.csv')

df1['Diabetes_binary'] = df1['Diabetes_binary'].replace({2: 1})
df = pd.concat([df1, df2], ignore_index=True)

X = df.drop('Diabetes_binary', axis=1)
y = df['Diabetes_binary']

print(f"Dataset shape: {X.shape}")
print(f"Class distribution:\n{y.value_counts(normalize=True)}")

Dataset shape: (324372, 21)
Class distribution:
Diabetes_binary
0.0    0.767788
1.0    0.232212
Name: proportion, dtype: float64


In [4]:
def predict_with_threshold(model, X, threshold=0.5):
    """
    Predict with custom threshold
        return lable, probability
    """
    proba = model.predict_proba(X)[:, 1]
    return (proba >= threshold).astype(int), proba

def calculate_metrics(y_true, y_pred, y_proba):
    """
    Calculate metrics - Acc, Precision, Recall, F1, PR-AUC
    """
    return {
        'accuracy': accuracy_score(y_true, y_pred),
        'precision': precision_score(y_true, y_pred, zero_division=0),
        'recall': recall_score(y_true, y_pred, zero_division=0),
        'f1': f1_score(y_true, y_pred, zero_division=0),
        'pr_auc': average_precision_score(y_true, y_proba),  # PR-AUC
    }

def find_optimal_threshold(y_true, y_proba, target_recall=0.7):
    """Find optimal threshold to achieve target recall"""
    precision, recall, thresholds = precision_recall_curve(y_true, y_proba)
    
    # Threshold at recall >= target_recall and highest precision
    valid_idx = np.where(recall[:-1] >= target_recall)[0]
    if len(valid_idx) == 0:
        return 0.5
    
    best_idx = valid_idx[np.argmax(precision[:-1][valid_idx])]
    return thresholds[best_idx]

In [7]:
N_TRIALS_BASE = 30

# Apply the first KFold and SMOTE 
def KFold_SMOTE_Pred_for_base_model(model_name,hyperparams = {}):
    skf_tune = StratifiedKFold(n_splits=3, shuffle=True, random_state=RANDOM_STATE)
    scores = []
    
    for train_idx, val_idx in skf_tune.split(X, y):
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
        
        if USE_SMOTE:
            X_train, y_train = smote.fit_resample(X_train, y_train)
        
        model = model_name(**hyperparams)
        model.fit(X_train, y_train)
        
        proba = model.predict_proba(X_val)[:, 1]
        pred = (proba >= THRESHOLD).astype(int)
        f2 = fbeta_score(y_val, pred, beta=2)
        scores.append(f2)
    
    return np.mean(scores)

# Create hyperparams tuning
def objective_xgb(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 300),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 0, 5),
        'random_state': RANDOM_STATE,
        'eval_metric': 'aucpr',     #PR-AUC
        'use_label_encoder': False,
    }
    xgb_score = KFold_SMOTE_Pred_for_base_model(XGBClassifier, params)
    
    return xgb_score

def objective_lgbm(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 300),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 10.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 10.0, log=True),
        'random_state': RANDOM_STATE,
        'verbose': -1,
    }
    lgbm_score = KFold_SMOTE_Pred_for_base_model(LGBMClassifier, params)

    return lgbm_score

    
def objective_catboost(trial):
    params = {
        'iterations': trial.suggest_int('iterations', 50, 300),
        'depth': trial.suggest_int('depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-8, 10.0, log=True),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0, 10),
        'random_strength': trial.suggest_float('random_strength', 0, 10),
        'random_state': RANDOM_STATE,
        'verbose': 0,
        'eval_metric': 'PRAUC', # PR-AUC
        'early_stopping_rounds': 50
    }
    cb_score = KFold_SMOTE_Pred_for_base_model(CatBoostClassifier, params)
    
    return cb_score


In [10]:
def study_model(objective_model, model_name: str):
    print(f"Tuning {model_name}:")
    model = optuna.create_study(direction = 'maximize', sampler = TPESampler(seed=RANDOM_STATE))
    model.optimize(objective_model, n_trials=N_TRIALS_BASE, show_progress_bar=True)
    best_model_params = model.best_params
    print(f"{model_name} best F2: {model.best_value:.4f}")
    return best_model_params

best_xgb_params= study_model(objective_xgb,"XGBoost")
best_lgbm_params = study_model(objective_lgbm,"LightGBM")
best_catboost_params = study_model(objective_catboost,"CatBoost")

# Summary 
print(f"\nXGBoost: {best_xgb_params}")
print(f"\nLightGBM: {best_lgbm_params}")
print(f"\nCatBoost: {best_catboost_params}")

Tuning XGBoost:


Best trial: 6. Best value: 0.712255:  37%|███▋      | 11/30 [04:02<06:58, 22.04s/it]


[W 2026-02-02 20:25:40,675] Trial 11 failed with parameters: {'n_estimators': 294, 'max_depth': 8, 'learning_rate': 0.011319796787971872, 'subsample': 0.887352152856701, 'colsample_bytree': 0.8394474835577637, 'min_child_weight': 5, 'gamma': 3.262052007161211} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\tandu\.conda\envs\diabetes\lib\site-packages\optuna\study\_optimize.py", line 205, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\tandu\AppData\Local\Temp\ipykernel_12280\618323907.py", line 39, in objective_xgb
    xgb_score = KFold_SMOTE_Pred_for_base_model(XGBClassifier, params)
  File "C:\Users\tandu\AppData\Local\Temp\ipykernel_12280\618323907.py", line 16, in KFold_SMOTE_Pred_for_base_model
    model.fit(X_train, y_train)
  File "c:\Users\tandu\.conda\envs\diabetes\lib\site-packages\xgboost\core.py", line 774, in inner_f
    return func(**kwargs)
  File "c:\Users\tandu\.conda\envs\diabetes\lib\site-pack

KeyboardInterrupt: 

In [None]:
best_xgb_params

In [9]:
# Setup models after Optuna

base_models = {
    'xgb': XGBClassifier(
        **best_xgb_params,
        random_state=RANDOM_STATE,
        eval_metric='aucpr',
        use_label_encoder=False,
        #verbose=1,
        early_stopping_rounds = 50
    ),
    'lgbm': LGBMClassifier(
        **best_lgbm_params,
        random_state=RANDOM_STATE,
        early_stopping_rounds = 50,
        verbose=-1
    ),
    'catboost': CatBoostClassifier(
        **best_catboost_params,
        random_state=RANDOM_STATE,
        verbose=0,
        early_stopping_rounds = 50,
        eval_metric='PRAUC'
    )
}

NameError: name 'best_xgb_params' is not defined

In [None]:
from sklearn.base import clone

skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=RANDOM_STATE)

# Storage cho meta features
oof_predictions = {name: np.zeros(len(X)) for name in base_models}
fold_metrics = {name: [] for name in base_models}

print("=" * 60)
print(f"STACKING WITH PR-AUC (Threshold = {THRESHOLD})")
print(f"SMOTE: {'Enabled' if USE_SMOTE else 'Disabled'}")
print("=" * 60)

with mlflow.start_run(run_name="stacking_kfold_prauc"):
    mlflow.log_param("n_folds", N_FOLDS)
    mlflow.log_param("threshold", THRESHOLD)
    mlflow.log_param("metric", "PR-AUC")
    mlflow.log_param("use_smote", USE_SMOTE)
    
    for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
        print(f"\n--- Fold {fold + 1}/{N_FOLDS} ---")
        
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
        
        # Apply SMOTE only on training data
        if USE_SMOTE:
            X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)
            print(f"  SMOTE: {len(y_train)} → {len(y_train_resampled)} samples")
        else:
            X_train_resampled, y_train_resampled = X_train, y_train
        
        for name, model in base_models.items():
            model_clone = clone(model)
            model_clone.fit(X_train_resampled, y_train_resampled)
            
            # Predict on ORIGINAL validation set
            y_pred, y_proba = predict_with_threshold(model_clone, X_val, THRESHOLD)
            
            # Save OOF predictions (probabilities)
            oof_predictions[name][val_idx] = y_proba
            
            # Calculate metrics
            metrics = calculate_metrics(y_val, y_pred, y_proba)
            fold_metrics[name].append(metrics)
            
            print(f"  {name}: PR-AUC={metrics['pr_auc']:.4f}, "
                  f"Recall={metrics['recall']:.4f}, Precision={metrics['precision']:.4f}")
    
    # Log average metrics per model
    print("\n" + "=" * 60)
    print("AVERAGE METRICS (K-Fold)")
    print("=" * 60)
    
    for name in base_models:
        avg_metrics = {
            k: np.mean([m[k] for m in fold_metrics[name]]) 
            for k in fold_metrics[name][0]
        }
        print(f"\n{name.upper()}:")
        for k, v in avg_metrics.items():
            print(f"  {k}: {v:.4f}")
            mlflow.log_metric(f"{name}_{k}", v)

STACKING WITH PR-AUC (Threshold = 0.3)
SMOTE: Enabled

--- Fold 1/5 ---
  SMOTE: 259497 → 398478 samples
  xgb: PR-AUC=0.5802, Recall=0.8945, Precision=0.3952
  lgbm: PR-AUC=0.5718, Recall=0.8907, Precision=0.3890
  catboost: PR-AUC=0.5724, Recall=0.9101, Precision=0.3782

--- Fold 2/5 ---
  SMOTE: 259497 → 398478 samples
  xgb: PR-AUC=0.5675, Recall=0.8947, Precision=0.3951
  lgbm: PR-AUC=0.5588, Recall=0.8927, Precision=0.3881
  catboost: PR-AUC=0.5588, Recall=0.9105, Precision=0.3771

--- Fold 3/5 ---
  SMOTE: 259498 → 398478 samples
  xgb: PR-AUC=0.5718, Recall=0.8897, Precision=0.3970
  lgbm: PR-AUC=0.5647, Recall=0.8866, Precision=0.3905
  catboost: PR-AUC=0.5632, Recall=0.9051, Precision=0.3776

--- Fold 4/5 ---
  SMOTE: 259498 → 398478 samples
  xgb: PR-AUC=0.5748, Recall=0.8941, Precision=0.3950
  lgbm: PR-AUC=0.5681, Recall=0.8937, Precision=0.3892
  catboost: PR-AUC=0.5686, Recall=0.9100, Precision=0.3780

--- Fold 5/5 ---
  SMOTE: 259498 → 398480 samples
  xgb: PR-AUC=0.571

In [None]:
print("\n" + "=" * 60)
print("OPTUNA HYPERPARAMETER TUNING FOR META LEARNER")
print("=" * 60)

# Create meta features from OOF predictions
meta_features = np.column_stack([oof_predictions[name] for name in base_models])
meta_features_df = pd.DataFrame(
    meta_features, 
    columns=['xgb_pred', 'lgbm_pred', 'catboost_pred']
)
meta_features_df['true_label'] = y.values

print(f"Meta features shape: {meta_features.shape}")

def objective(trial):
    """Optuna objective function for meta learner tuning"""
    
    # Hyperparameters to tune
    params = {
        'C': trial.suggest_float('C', 0.001, 100, log=True),
        'penalty': trial.suggest_categorical('penalty', ['l1', 'l2']),
        'solver': 'saga',  # Supports both l1 and l2
        'max_iter': 2000,
        'random_state': RANDOM_STATE,
    }
    
    # Threshold tuning
    threshold = trial.suggest_float('threshold', 0.2, 0.5)
    
    # Cross-validation for meta learner
    skf_meta = StratifiedKFold(n_splits=3, shuffle=True, random_state=RANDOM_STATE)
    scores = []
    
    for train_idx, val_idx in skf_meta.split(meta_features, y):
        X_train_meta = meta_features[train_idx]
        X_val_meta = meta_features[val_idx]
        y_train_meta = y.iloc[train_idx]
        y_val_meta = y.iloc[val_idx]
        
        model = LogisticRegression(**params)
        model.fit(X_train_meta, y_train_meta)
        
        # Predict with threshold
        proba = model.predict_proba(X_val_meta)[:, 1]
        pred = (proba >= threshold).astype(int)
        
        # Optimize F2-score (recall more important)
        f2 = fbeta_score(y_val_meta, pred, beta=2)
        scores.append(f2)
    
    return np.mean(scores)

# Run Optuna study
study = optuna.create_study(
    direction='maximize',
    sampler=TPESampler(seed=RANDOM_STATE),
    study_name='meta_learner_tuning'
)

study.optimize(objective, n_trials=50, show_progress_bar=True)

# Best parameters
best_params = study.best_params
best_threshold = best_params.pop('threshold')

print(f"\n✅ Best F2-Score: {study.best_value:.4f}")
print(f"Best Threshold: {best_threshold:.3f}")
print(f"Best Params: {best_params}")


OPTUNA HYPERPARAMETER TUNING FOR META LEARNER
Meta features shape: (324372, 3)


Best trial: 25. Best value: 0.690257: 100%|██████████| 50/50 [02:53<00:00,  3.47s/it]


✅ Best F2-Score: 0.6903
Best Threshold: 0.201
Best Params: {'C': 1.973072411270085, 'penalty': 'l2'}





In [None]:
# Use best params from Optuna
meta_learner_params = {
    'C': best_params['C'],
    'penalty': best_params['penalty'],
    'solver': 'saga',
    'max_iter': 2000,
    'random_state': RANDOM_STATE,
}

print(f"Using params: {meta_learner_params}")
print(f"Using threshold: {best_threshold:.3f}")

# Train meta learner
meta_learner = LogisticRegression(**meta_learner_params)
meta_learner.fit(meta_features, y)

# Predict với best threshold
meta_proba = meta_learner.predict_proba(meta_features)[:, 1]
meta_pred = (meta_proba >= best_threshold).astype(int)

# Tính metrics cho meta learner
meta_metrics = calculate_metrics(y, meta_pred, meta_proba)
meta_metrics['f2'] = fbeta_score(y, meta_pred, beta=2)

print(f"\nMETA LEARNER RESULTS (Threshold = {best_threshold:.3f}):")
for k, v in meta_metrics.items():
    print(f"  {k}: {v:.4f}")


TRAINING META LEARNER WITH BEST PARAMS
Using params: {'C': 1.973072411270085, 'penalty': 'l2', 'solver': 'saga', 'max_iter': 2000, 'random_state': 42}
Using threshold: 0.201

META LEARNER RESULTS (Threshold = 0.201):
  accuracy: 0.7287
  precision: 0.4522
  recall: 0.7950
  f1: 0.5765
  pr_auc: 0.5736
  f2: 0.6903


In [None]:
import os
os.makedirs("../data", exist_ok=True)

with mlflow.start_run(run_name="stacking_full_data_optuna"):
    # Log params
    mlflow.log_param("training_type", "full_data")
    mlflow.log_param("threshold", best_threshold)
    mlflow.log_param("metric", "PR-AUC")
    mlflow.log_param("meta_learner", "LogisticRegression")
    mlflow.log_param("base_models", list(base_models.keys()))
    mlflow.log_param("optuna_best_f2_meta", study.best_value)
    mlflow.log_param("optuna_best_f2_xgb", study_xgb.best_value)
    mlflow.log_param("optuna_best_f2_lgbm", study_lgbm.best_value)
    mlflow.log_param("optuna_best_f2_catboost", study_catboost.best_value)
    mlflow.log_param("n_samples", len(X))
    mlflow.log_param("n_features", X.shape[1])
    mlflow.log_param("use_smote", USE_SMOTE)
    
    # Log best params
    for k, v in best_xgb_params.items():
        mlflow.log_param(f"xgb_{k}", v)
    for k, v in best_lgbm_params.items():
        mlflow.log_param(f"lgbm_{k}", v)
    for k, v in best_catboost_params.items():
        mlflow.log_param(f"catboost_{k}", v)
    for k, v in best_params.items():
        mlflow.log_param(f"meta_{k}", v)
    
    # Apply SMOTE
    if USE_SMOTE:
        X_res, y_res = smote.fit_resample(X, y)
        print(f"SMOTE applied: {len(y)} → {len(y_res)} samples")
        print(f"Class distribution after SMOTE:\n{pd.Series(y_res).value_counts()}")
    else:
        X_res, y_res = X, y
    
    # Train final base models on SMOTE data
    final_base_models = {}
    final_predictions = {}
    
    for name, model in base_models.items():
        final_model = clone(model)
        final_model.fit(X_res, y_res)
        final_base_models[name] = final_model
        
        # Get predictions on ORIGINAL data (for evaluation)
        proba = final_model.predict_proba(X)[:, 1]
        final_predictions[name] = proba
        
        # Log base model
        mlflow.sklearn.log_model(final_model, f"base_model_{name}")
    
    # Create final meta features (from original data predictions)
    final_meta_features = np.column_stack([final_predictions[name] for name in base_models])
    
    # Train final meta learner with best params
    final_meta_learner = LogisticRegression(**meta_learner_params)
    final_meta_learner.fit(final_meta_features, y)
    
    # Final predictions với best threshold
    final_proba = final_meta_learner.predict_proba(final_meta_features)[:, 1]
    final_pred = (final_proba >= best_threshold).astype(int)
    
    # Calculate final metrics
    final_metrics = calculate_metrics(y, final_pred, final_proba)
    final_metrics['f2'] = fbeta_score(y, final_pred, beta=2)
    
    print(f"\nFINAL STACKING RESULTS (Threshold = {best_threshold:.3f}):")
    for k, v in final_metrics.items():
        print(f"  {k}: {v:.4f}")
        mlflow.log_metric(f"overall_{k}", v)
    
    # Log confusion matrix
    cm = confusion_matrix(y, final_pred)
    print(f"\nConfusion Matrix:\n{cm}")
    
    # Log classification report
    print("\nClassification Report:")
    print(classification_report(y, final_pred))
    
    # Log artifacts    
    # Raw data
    df.to_csv("../data/data_full.csv", index=False)
    mlflow.log_artifact("../data/data_full.csv", name = 'data_full')
    
    # Feature names
    feature_names = pd.DataFrame({'feature_name': X.columns.tolist()})
    feature_names.to_csv("../data/feature_names.csv", index=False)
    mlflow.log_artifact("../data/feature_names.csv", name = 'feature_names')
    
    # Meta features
    meta_features_df.to_csv("../data/meta_features.csv", index=False)
    mlflow.log_artifact("../data/meta_features.csv", name = 'meta_features')
    
    # Optuna results for all models
    optuna_results_meta = study.trials_dataframe()
    optuna_results_meta['model'] = 'meta_learner'
    optuna_results_xgb = study_xgb.trials_dataframe()
    optuna_results_xgb['model'] = 'xgb'
    optuna_results_lgbm = study_lgbm.trials_dataframe()
    optuna_results_lgbm['model'] = 'lgbm'
    optuna_results_catboost = study_catboost.trials_dataframe()
    optuna_results_catboost['model'] = 'catboost'
    
    all_optuna_results = pd.concat([
        optuna_results_xgb, optuna_results_lgbm, 
        optuna_results_catboost, optuna_results_meta
    ], ignore_index=True)
    all_optuna_results.to_csv("../data/optuna_results.csv", index=False)
    mlflow.log_artifact("../data/optuna_results.csv", name = 'optuna_res')
    
    # 5. Log config/threshold info
    config_info = {
        'threshold': best_threshold,
        'n_folds': N_FOLDS,
        'random_state': RANDOM_STATE,
        'use_smote': USE_SMOTE,
        'meta_learner_params': meta_learner_params,
        'xgb_params': best_xgb_params,
        'lgbm_params': best_lgbm_params,
        'catboost_params': best_catboost_params,
        'base_models': list(base_models.keys()),
    }
    pd.DataFrame([config_info]).to_json("../data/config.json", orient='records', indent=2)
    mlflow.log_artifact("../data/config.json", name = 'config')
    
    # Log meta learner w/ signature
    signature = infer_signature(final_meta_features, final_meta_learner.predict(final_meta_features))
    mlflow.sklearn.log_model(
        final_meta_learner, 
        "meta_learner_final",
        signature=signature
    )
    
    print("All models and artifacts logged")


FINAL TRAINING ON FULL DATA
SMOTE applied: 324372 → 498098 samples
Class distribution after SMOTE:
Diabetes_binary
0.0    249049
1.0    249049
Name: count, dtype: int64





FINAL STACKING RESULTS (Threshold = 0.201):
  accuracy: 0.7355
  precision: 0.4600
  recall: 0.8004
  f1: 0.5843
  pr_auc: 0.5858
  f2: 0.6972

Confusion Matrix:
[[178291  70758]
 [ 15037  60286]]

Classification Report:
              precision    recall  f1-score   support

         0.0       0.92      0.72      0.81    249049
         1.0       0.46      0.80      0.58     75323

    accuracy                           0.74    324372
   macro avg       0.69      0.76      0.70    324372
weighted avg       0.81      0.74      0.75    324372






✅ All models and artifacts logged successfully!
   Saved to ../data/:
   - data_full.csv
   - feature_names.csv
   - meta_features.csv
   - optuna_results.csv (all 4 models)
   - config.json
   MLflow models:
   - base_model_xgb, base_model_lgbm, base_model_catboost
   - meta_learner_final


In [None]:
thresholds_to_compare = [_ for _ in range(0.2,0.7,0.05)]

results = []
for thresh in thresholds_to_compare:
    proba = final_meta_learner.predict_proba(final_meta_features)[:, 1]
    pred = (proba >= thresh).astype(int)
    metrics = calculate_metrics(y, pred, proba)
    metrics['f2'] = fbeta_score(y, pred, beta=2)
    metrics['threshold'] = thresh
    results.append(metrics)
    
    if abs(thresh - best_threshold) < 0.05:
        print(f'The best threshold = {thresh}')
    
    print(f"\nThreshold = {thresh}")
    print(f"  Precision: {metrics['precision']:.4f}")
    print(f"  Recall:    {metrics['recall']:.4f}")
    print(f"  F1:        {metrics['f1']:.4f}")
    print(f"  F2:        {metrics['f2']:.4f}")
    print(f"  PR-AUC:    {metrics['pr_auc']:.4f}")

# Summary table
print("SUMMARY:")
results_df = pd.DataFrame(results)
results_df = results_df[['threshold', 'precision', 'recall', 'f1', 'f2', 'pr_auc']]
print(results_df.to_string(index=False))


THRESHOLD COMPARISON

Threshold = 0.2: ← BEST (Optuna)
  Precision: 0.4598
  Recall:    0.8012
  F1:        0.5843
  F2:        0.6976
  PR-AUC:    0.5858

Threshold = 0.3:
  Precision: 0.5164
  Recall:    0.6815
  F1:        0.5876
  F2:        0.6405
  PR-AUC:    0.5858

Threshold = 0.4:
  Precision: 0.5636
  Recall:    0.5728
  F1:        0.5682
  F2:        0.5709
  PR-AUC:    0.5858

Threshold = 0.5:
  Precision: 0.6124
  Recall:    0.4387
  F1:        0.5112
  F2:        0.4651
  PR-AUC:    0.5858

Threshold = 0.6:
  Precision: 0.6675
  Recall:    0.2983
  F1:        0.4123
  F2:        0.3354
  PR-AUC:    0.5858

Threshold = 0.7:
  Precision: 0.7439
  Recall:    0.1396
  F1:        0.2350
  F2:        0.1666
  PR-AUC:    0.5858

SUMMARY TABLE
 threshold  precision   recall       f1       f2   pr_auc
       0.2   0.459790 0.801243 0.584288 0.697627 0.585776
       0.3   0.516439 0.681492 0.587595 0.640548 0.585776
       0.4   0.563611 0.572800 0.568168 0.570938 0.585776
       