# Home Credit Prediction Models

Finding best model to use for predicting if client will have payment difficulties or not.

1. Combining and Spliting Data
2. Models with No Feature Engineering,
3. Models with Feature Engineering,
4. Tuning Models with Feature Engineering

In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split, StratifiedKFold
import math
from sklearn.preprocessing import (
    LabelEncoder,
    FunctionTransformer,
    StandardScaler,
    OneHotEncoder,
)
from pandas.api.types import CategoricalDtype
from sklearn.metrics import (
    confusion_matrix,
    accuracy_score,
    f1_score,
    roc_auc_score,
    average_precision_score,
)
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
import optuna
from optuna import create_study
from optuna.pruners import MedianPruner
from sklearn.ensemble import VotingClassifier
import warnings
from sklearn.compose import ColumnTransformer
import time
import xgboost as xgb
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.base import BaseEstimator, TransformerMixin
from IPython.display import display
import json
from sklearn.metrics import (
    average_precision_score,
    roc_auc_score,
    f1_score,
    accuracy_score,
)
from functions import convert_days_to_years, convert_days_to_months, show_head_and_info
from model_functions import (
    get_generic_preprocessor,
    feature_engineering_custom,
    merge_by_sk_id_curr,
)
import psutil
import gc


warnings.simplefilter(action="ignore", category=FutureWarning)

## 1. Combining and Spliting Data

### About Datasets

In [2]:
application_data = pd.read_csv("application_train.csv")

In [3]:
model_df = merge_by_sk_id_curr(application_data)

In [4]:
show_head_and_info(model_df)

Data


Unnamed: 0,sk_id_curr,target,name_contract_type,code_gender,flag_own_car,flag_own_realty,cnt_children,amt_income_total,amt_credit,amt_annuity,...,name_goods_category_other,name_payment_type_other,name_portfolio_other,name_product_type_other,name_seller_industry_other,name_type_suite_other,name_yield_group_other,product_combination_other,status_other,weekday_appr_process_start_other
0,100002,1,Cash loans,M,N,Y,0,202500.0,406597.5,24700.5,...,Vehicles,XNA,POS,XNA,Auto technology,Unknown,low_normal,POS other with interest,0.0,SATURDAY
1,100003,0,Cash loans,F,N,N,0,270000.0,1293502.5,35698.5,...,Consumer Electronics,Cash through the bank,POS,XNA,Consumer electronics,Family,middle,POS household with interest,,SATURDAY
2,100004,0,Revolving loans,M,Y,Y,0,67500.0,135000.0,6750.0,...,Mobile,Cash through the bank,POS,XNA,Connectivity,Unaccompanied,middle,POS mobile without interest,,FRIDAY
3,100006,0,Cash loans,F,N,Y,0,135000.0,312682.5,29686.5,...,,,,,,,,,,
4,100007,0,Cash loans,M,N,Y,0,121500.0,513000.0,21865.5,...,XNA,Cash through the bank,Cash,x-sell,Consumer electronics,Family,high,Cash X-Sell: middle,,SUNDAY



Info
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 307511 entries, 0 to 307510
Columns: 219 entries, sk_id_curr to weekday_appr_process_start_other
dtypes: float64(140), int64(41), object(38)
memory usage: 513.8+ MB


### Splitting data

In [5]:
X = model_df.drop(columns=["target"])
y = model_df["target"]

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

## 2. On Raw Data

### 2.1. Pipeline

In [6]:
preprocessor = get_generic_preprocessor(X_train, impute=True)
preprocessor_no_impute = get_generic_preprocessor(X_train, impute=False)

class_ratio = y_train.value_counts(normalize=True)
if len(class_ratio) > 1 and class_ratio[1] > 0:
    scale_pos_weight = class_ratio[0] / class_ratio[1]
else:
    scale_pos_weight = 1

models = {
    "Random Forest": Pipeline(
        [
            ("preprocessor", preprocessor),
            (
                "classifier",
                RandomForestClassifier(random_state=42, class_weight="balanced"),
            ),
        ]
    ),
    "XGBoost": Pipeline(
        [
            ("preprocessor", preprocessor_no_impute),
            (
                "classifier",
                XGBClassifier(
                    random_state=42,
                    scale_pos_weight=scale_pos_weight,
                    use_label_encoder=False,
                ),
            ),
        ]
    ),
    "Logistic Regression": Pipeline(
        [
            ("preprocessor", preprocessor),
            (
                "classifier",
                LogisticRegression(random_state=42, class_weight="balanced"),
            ),
        ]
    ),
    "KNN": Pipeline(
        [
            ("preprocessor", preprocessor),
            ("classifier", KNeighborsClassifier()),
        ]
    ),
    "LightGBM": Pipeline(
        [
            ("preprocessor", preprocessor_no_impute),
            (
                "classifier",
                LGBMClassifier(
                    random_state=42, scale_pos_weight=scale_pos_weight, verbose=0
                ),
            ),
        ]
    ),
    "CatBoost": Pipeline(
        [
            ("preprocessor", preprocessor_no_impute),
            (
                "classifier",
                CatBoostClassifier(
                    random_seed=42, auto_class_weights="Balanced", verbose=0
                ),
            ),
        ]
    ),
}

### 2.2. Training Models

In [7]:
model_results = {}
print("Starting model training...\n")

for model_name, model_pipeline in models.items():
    print(f"Training {model_name}...")
    start_time = time.time()

    model_pipeline.fit(X_train, y_train)
    y_pred = model_pipeline.predict(X_val)

    try:
        y_pred_proba = model_pipeline.predict_proba(X_val)[:, 1]
    except AttributeError:
        try:
            decision_scores = model_pipeline.decision_function(X_val)
            if decision_scores.ndim > 1 and decision_scores.shape[1] > 1:
                y_pred_proba = (
                    (decision_scores[:, 1] - decision_scores[:, 1].min())
                    / (decision_scores[:, 1].max() - decision_scores[:, 1].min())
                    if (decision_scores[:, 1].max() - decision_scores[:, 1].min()) != 0
                    else np.zeros_like(y_pred, dtype=float)
                )
            else:
                y_pred_proba = (
                    (decision_scores - decision_scores.min())
                    / (decision_scores.max() - decision_scores.min())
                    if (decision_scores.max() - decision_scores.min()) != 0
                    else np.zeros_like(y_pred, dtype=float)
                )
        except AttributeError:
            y_pred_proba = np.zeros_like(y_pred, dtype=float)
        except Exception:
            y_pred_proba = np.zeros_like(y_pred, dtype=float)

    pr_auc = average_precision_score(y_val, y_pred_proba)
    roc_auc = roc_auc_score(y_val, y_pred_proba)
    f1_score_1 = f1_score(y_val, y_pred, pos_label=1, zero_division=0)
    f1_score_0 = f1_score(y_val, y_pred, pos_label=0, zero_division=0)
    accuracy = accuracy_score(y_val, y_pred)

    model_results[model_name] = [pr_auc, roc_auc, f1_score_1, f1_score_0, accuracy]

    elapsed = time.time() - start_time
    print(f"{model_name} finished in {elapsed:.2f} seconds.\n")

df_results = pd.DataFrame(
    model_results,
    index=[
        "PR-AUC",
        "ROC-AUC",
        "F1-Score (Payment Difficulties)",
        "F1-Score (Other Cases)",
        "Accuracy",
    ],
).T

try:
    display(df_results.style.background_gradient(cmap="Blues"))
except ImportError:
    print(df_results)

Starting model training...

Training Random Forest...
Random Forest finished in 181.05 seconds.

Training XGBoost...


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


XGBoost finished in 16.66 seconds.

Training Logistic Regression...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression finished in 17.88 seconds.

Training KNN...
KNN finished in 162.58 seconds.

Training LightGBM...




LightGBM finished in 11.94 seconds.

Training CatBoost...
CatBoost finished in 81.97 seconds.



Unnamed: 0,PR-AUC,ROC-AUC,F1-Score (Payment Difficulties),F1-Score (Other Cases),Accuracy
Random Forest,0.202136,0.723952,0.00201,0.957935,0.919272
XGBoost,0.241066,0.754734,0.28195,0.84809,0.749232
Logistic Regression,0.240951,0.761431,0.268754,0.80951,0.697755
KNN,0.104049,0.583916,0.042109,0.954678,0.913451
LightGBM,0.262668,0.769708,0.276782,0.819234,0.710762
CatBoost,0.260477,0.765673,0.292237,0.858083,0.763573


On the raw dataset, with only basic missing value handling and encoding:
* CatBoost has second highest PR-AUC (26.05%) and highest F1-Score for payment difficulties group (29.22%).
* LightGBM has highest PR-AUC (26.27%), but has lower F1-score (27.68%) for payment difficulties group.
* XGBoost even if it doesn't have hight PR-AUC (24.11%), but it has high F1-score (28.20%) for payment difficulties group
* Logistic Regression is average in all metrix.
* KNN and Random Forest have good accuracy and F1-score other cases, but they work badly with payment difficulties group.

## 3. Feature Engineering

### 3.1. Pipeline

In [8]:
class FeatureEngineeringTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, func):
        self.func = func

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        return self.func(X)


feature_engineering = FeatureEngineeringTransformer(feature_engineering_custom)
X_train_fe = feature_engineering_custom(X_train)

preprocessor = get_generic_preprocessor(X_train_fe, impute=True)
preprocessor_no_impute = get_generic_preprocessor(X_train_fe, impute=False)


models = {
    "Random Forest": Pipeline(
        [
            ("feature_engineering", feature_engineering),
            ("preprocessor", preprocessor),
            (
                "classifier",
                RandomForestClassifier(random_state=42, class_weight="balanced"),
            ),
        ]
    ),
    "XGBoost": Pipeline(
        [
            ("feature_engineering", feature_engineering),
            ("preprocessor", preprocessor_no_impute),
            (
                "classifier",
                XGBClassifier(
                    random_state=42,
                    scale_pos_weight=scale_pos_weight,
                    use_label_encoder=False,
                ),
            ),
        ]
    ),
    "Logistic Regression": Pipeline(
        [
            ("feature_engineering", feature_engineering),
            ("preprocessor", preprocessor),
            (
                "classifier",
                LogisticRegression(
                    random_state=42, class_weight="balanced", max_iter=1000
                ),
            ),
        ]
    ),
    "KNN": Pipeline(
        [
            ("feature_engineering", feature_engineering),
            ("preprocessor", preprocessor),
            ("classifier", KNeighborsClassifier()),
        ]
    ),
    "LightGBM": Pipeline(
        [
            ("feature_engineering", feature_engineering),
            ("preprocessor", preprocessor_no_impute),
            (
                "classifier",
                LGBMClassifier(
                    random_state=42, scale_pos_weight=scale_pos_weight, verbose=0
                ),
            ),
        ]
    ),
    "CatBoost": Pipeline(
        [
            ("feature_engineering", feature_engineering),
            ("preprocessor", preprocessor_no_impute),
            (
                "classifier",
                CatBoostClassifier(
                    random_seed=42, auto_class_weights="Balanced", verbose=0
                ),
            ),
        ]
    ),
}

### 3.2. Training Model

In [9]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
model_results = {}

print("Starting model training...\n")

for model_name, model_pipeline in models.items():
    print(f"Evaluating {model_name}...")
    start_time = time.time()

    pr_aucs, roc_aucs = [], []
    f1_1s, f1_0s = [], []
    accuracies = []

    for train_idx, val_idx in cv.split(X_train, y_train):
        X_tr, X_va = X_train.iloc[train_idx], X_train.iloc[val_idx]
        y_tr, y_va = y_train.iloc[train_idx], y_train.iloc[val_idx]

        model_pipeline.fit(X_tr, y_tr)
        y_pred = model_pipeline.predict(X_va)

        try:
            y_pred_proba = model_pipeline.predict_proba(X_va)[:, 1]
        except AttributeError:
            try:
                decision_scores = model_pipeline.decision_function(X_va)
                if decision_scores.ndim > 1 and decision_scores.shape[1] > 1:
                    y_pred_proba = (
                        (decision_scores[:, 1] - decision_scores[:, 1].min())
                        / (decision_scores[:, 1].max() - decision_scores[:, 1].min())
                        if (decision_scores[:, 1].max() - decision_scores[:, 1].min())
                        != 0
                        else np.zeros_like(y_pred, dtype=float)
                    )
                else:
                    y_pred_proba = (
                        (decision_scores - decision_scores.min())
                        / (decision_scores.max() - decision_scores.min())
                        if (decision_scores.max() - decision_scores.min()) != 0
                        else np.zeros_like(y_pred, dtype=float)
                    )
            except Exception:
                y_pred_proba = np.zeros_like(y_pred, dtype=float)

        pr_aucs.append(average_precision_score(y_va, y_pred_proba))
        roc_aucs.append(roc_auc_score(y_va, y_pred_proba))
        f1_1s.append(f1_score(y_va, y_pred, pos_label=1, zero_division=0))
        f1_0s.append(f1_score(y_va, y_pred, pos_label=0, zero_division=0))
        accuracies.append(accuracy_score(y_va, y_pred))

    model_results[model_name] = [
        np.mean(pr_aucs),
        np.mean(roc_aucs),
        np.mean(f1_1s),
        np.mean(f1_0s),
        np.mean(accuracies),
    ]

    elapsed = time.time() - start_time
    print(f"{model_name} finished in {elapsed:.2f} seconds.\n")

df_results = pd.DataFrame(
    model_results,
    index=[
        "PR-AUC",
        "ROC-AUC",
        "F1-Score (Payment Difficulties)",
        "F1-Score (Other Cases)",
        "Accuracy",
    ],
).T

try:
    display(df_results.style.background_gradient(cmap="Blues"))
except ImportError:
    print(df_results)

Starting model training...

Evaluating Random Forest...
Random Forest finished in 730.90 seconds.

Evaluating XGBoost...


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


XGBoost finished in 85.21 seconds.

Evaluating Logistic Regression...
Logistic Regression finished in 190.23 seconds.

Evaluating KNN...
KNN finished in 584.56 seconds.

Evaluating LightGBM...




LightGBM finished in 55.32 seconds.

Evaluating CatBoost...
CatBoost finished in 416.36 seconds.



Unnamed: 0,PR-AUC,ROC-AUC,F1-Score (Payment Difficulties),F1-Score (Other Cases),Accuracy
Random Forest,0.197178,0.72328,0.004308,0.957903,0.919222
XGBoost,0.230301,0.747671,0.279228,0.853182,0.756061
Logistic Regression,0.237508,0.760178,0.270091,0.811622,0.700534
KNN,0.103448,0.578782,0.041228,0.955076,0.914174
LightGBM,0.249079,0.765628,0.275926,0.820076,0.711774
CatBoost,0.247909,0.760456,0.291376,0.861275,0.767975


After applying feature engineering:
* LightGBM has highest PR-AUC (24.91%) but a bit lower F1-score for payment difficulties (27.59%).
* CatBoost has slightly lower PR-AUC (24.79%) and highest F1-score for payment difficulties (29.14%).
* XGBoost has second highest F1-score for payment difficulties (27.92%), but have low PR-AUC (23.03%).
* Logistic Regression is average as it was before.
* KNN and Random Forest were preforming good with accuracy and F1-score for other cases group.

Since KNN and Random Forest perform poorly overall, and Logistic Regression consistently underperforms more advanced models across all key metrics, we exclude them from further tuning.

## 4. Tuning Models with Feature Engineering

In [10]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
grid_model_results = {}


def objective_xgb(trial):
    params = {
        "max_depth": trial.suggest_int("max_depth", 4, 8),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 10),
        "n_estimators": trial.suggest_int("n_estimators", 100, 300),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.1, log=True),
        "scale_pos_weight": scale_pos_weight,
        "subsample": trial.suggest_float("subsample", 0.7, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.7, 1.0),
        "objective": "binary:logistic",
        "random_state": 42,
        "verbosity": 0,
        "use_label_encoder": False,
        "max_delta_step": trial.suggest_int("max_delta_step", 1, 10),
    }
    scores = []
    for train_idx, val_idx in cv.split(X_train, y_train):
        X_tr, X_va = X_train.iloc[train_idx], X_train.iloc[val_idx]
        y_tr, y_va = y_train.iloc[train_idx], y_train.iloc[val_idx]
        model = XGBClassifier(**params)
        pipeline = Pipeline(
            [
                ("feature_engineering", feature_engineering),
                ("preprocessor", preprocessor_no_impute),
                ("classifier", model),
            ]
        )
        pipeline.fit(X_tr, y_tr)
        preds = pipeline.predict_proba(X_va)[:, 1]
        scores.append(average_precision_score(y_va, preds))
    return np.mean(scores)


def objective_lgb(trial):
    params = {
        "max_depth": trial.suggest_int("max_depth", 4, 8),
        "n_estimators": trial.suggest_int("n_estimators", 100, 300),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.1, log=True),
        "scale_pos_weight": scale_pos_weight,
        "num_leaves": trial.suggest_int("num_leaves", 31, 127),
        "min_child_samples": trial.suggest_int("min_child_samples", 10, 50),
        "subsample": trial.suggest_float("subsample", 0.7, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.7, 1.0),
        "objective": "binary",
        "random_state": 42,
        "verbosity": -1,
        "verbose": -1,
    }

    scores = []

    for train_idx, val_idx in cv.split(X_train, y_train):
        X_tr, X_va = X_train.iloc[train_idx], X_train.iloc[val_idx]
        y_tr, y_va = y_train.iloc[train_idx], y_train.iloc[val_idx]

        model = LGBMClassifier(**params)

        pipeline = Pipeline(
            [
                ("feature_engineering", feature_engineering),
                ("preprocessor", preprocessor_no_impute),
                ("classifier", model),
            ]
        )

        pipeline.fit(X_tr, y_tr)

        preds = pipeline.predict_proba(X_va)[:, 1]
        score = average_precision_score(y_va, preds)
        scores.append(score)

    return np.mean(scores)


def objective_catboost(trial):
    params = {
        "depth": trial.suggest_int("depth", 4, 8),
        "iterations": trial.suggest_int("iterations", 100, 300),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.1, log=True),
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1.0, 5.0),
        "border_count": trial.suggest_int("border_count", 32, 128),
        "verbose": 0,
        "auto_class_weights": "Balanced",
        "random_seed": 42,
        "task_type": "CPU",
    }
    scores = []
    for train_idx, val_idx in cv.split(X_train, y_train):
        X_tr, X_va = X_train.iloc[train_idx], X_train.iloc[val_idx]
        y_tr, y_va = y_train.iloc[train_idx], y_train.iloc[val_idx]
        model = CatBoostClassifier(**params)
        pipeline = Pipeline(
            [
                ("feature_engineering", feature_engineering),
                ("preprocessor", preprocessor_no_impute),
                ("classifier", model),
            ]
        )
        pipeline.fit(X_tr, y_tr)
        preds = pipeline.predict_proba(X_va)[:, 1]
        scores.append(average_precision_score(y_va, preds))
    return np.mean(scores)

In [11]:
class EarlyStoppingCallback:
    def __init__(self, patience: int = 10):
        self.patience = patience
        self.best_value = None
        self.no_improvement_count = 0

    def __call__(self, study, trial):
        if self.best_value is None or study.best_value > self.best_value:
            self.best_value = study.best_value
            self.no_improvement_count = 0
        else:
            self.no_improvement_count += 1
        if self.no_improvement_count >= self.patience:
            print(f"Early stopping: No improvement in {self.patience} trials.")
            study.stop()


objective_funcs = {
    "XGBoost": objective_xgb,
    "LightGBM": objective_lgb,
    "CatBoost": objective_catboost,
}

for model_name in objective_funcs.keys():
    print(f"Processing {model_name}...")
    start_time = time.time()

    early_stop = EarlyStoppingCallback(patience=10)
    study = optuna.create_study(direction="maximize", study_name=model_name)
    study.optimize(objective_funcs[model_name], n_trials=50, callbacks=[early_stop])
    best_params = study.best_params

    if model_name == "XGBoost":
        best_clf = XGBClassifier(
            **best_params,
            use_label_encoder=False,
            random_state=42,
            verbosity=0,
            scale_pos_weight=scale_pos_weight,
        )
    elif model_name == "LightGBM":
        best_clf = LGBMClassifier(
            **best_params,
            random_state=42,
            verbosity=-1,
            scale_pos_weight=scale_pos_weight,
        )
    else:
        best_clf = CatBoostClassifier(
            **best_params,
            verbose=0,
            random_seed=42,
            auto_class_weights="Balanced",
        )

    pr_aucs, roc_aucs, f1_1s, f1_0s, accuracies = [], [], [], [], []

    for train_idx, val_idx in cv.split(X_train, y_train):
        X_tr, X_va = X_train.iloc[train_idx], X_train.iloc[val_idx]
        y_tr, y_va = y_train.iloc[train_idx], y_train.iloc[val_idx]

        pipeline = Pipeline(
            [
                ("feature_engineering", feature_engineering),
                ("preprocessor", preprocessor_no_impute),
                ("classifier", best_clf),
            ]
        )

        pipeline.fit(X_tr, y_tr)
        y_pred = pipeline.predict(X_va)

        try:
            y_pred_proba = pipeline.predict_proba(X_va)[:, 1]
        except AttributeError:
            try:
                decision_scores = pipeline.decision_function(X_va)
                denom = decision_scores.max() - decision_scores.min()
                y_pred_proba = (
                    ((decision_scores - decision_scores.min()) / denom)
                    if denom != 0
                    else np.zeros_like(y_pred, dtype=float)
                )
            except:
                y_pred_proba = np.zeros_like(y_pred, dtype=float)

        pr_aucs.append(average_precision_score(y_va, y_pred_proba))
        roc_aucs.append(roc_auc_score(y_va, y_pred_proba))
        f1_1s.append(f1_score(y_va, y_pred, pos_label=1, zero_division=0))
        f1_0s.append(f1_score(y_va, y_pred, pos_label=0, zero_division=0))
        accuracies.append(accuracy_score(y_va, y_pred))

    grid_model_results[model_name] = {
        "Best Parameters": json.dumps(best_params, indent=2),
        "Best Optuna PR-AUC": study.best_value,
        "Mean PR-AUC": np.mean(pr_aucs),
        "Mean ROC-AUC": np.mean(roc_aucs),
        "F1 Score (Payment Difficulties)": np.mean(f1_1s),
        "F1 Score (Other Cases)": np.mean(f1_0s),
        "Accuracy": np.mean(accuracies),
    }

    elapsed = time.time() - start_time
    print(f"{model_name} finished in {elapsed:.2f} seconds.\n")
    print("Best Parameters:", best_params)
    print("Best Average Precision Score (Optuna):", study.best_value)
    print()

    del study, best_clf
    gc.collect()

df_results = pd.DataFrame(grid_model_results).T.rename(
    columns={
        "Best Parameters": "Params",
        "Best Optuna PR-AUC": "PR-AUC",
        "Mean PR-AUC": "PR-AUC (CV)",
        "Mean ROC-AUC": "ROC-AUC",
        "F1 Score (Payment Difficulties)": "F1-Score (Payment Difficulties)",
        "F1 Score (Other Cases)": "F1-Score (Other Cases)",
        "Accuracy": "Accuracy",
    }
)

try:
    display(df_results.style.background_gradient(cmap="Blues"))
except ImportError:
    print(df_results)

[I 2025-06-10 18:39:15,449] A new study created in memory with name: XGBoost


Processing XGBoost...


[I 2025-06-10 18:40:49,876] Trial 0 finished with value: 0.2476445867439113 and parameters: {'max_depth': 6, 'min_child_weight': 6, 'n_estimators': 138, 'learning_rate': 0.0607498486754422, 'subsample': 0.7527716944671111, 'colsample_bytree': 0.9942358271807217, 'max_delta_step': 2}. Best is trial 0 with value: 0.2476445867439113.
[I 2025-06-10 18:42:23,145] Trial 1 finished with value: 0.24513600542459651 and parameters: {'max_depth': 7, 'min_child_weight': 3, 'n_estimators': 122, 'learning_rate': 0.07446610669693111, 'subsample': 0.7222485865230469, 'colsample_bytree': 0.7364086001941689, 'max_delta_step': 5}. Best is trial 0 with value: 0.2476445867439113.
[I 2025-06-10 18:44:01,905] Trial 2 finished with value: 0.24977466328786316 and parameters: {'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 190, 'learning_rate': 0.05433413346104099, 'subsample': 0.7035796161205256, 'colsample_bytree': 0.9933076172423544, 'max_delta_step': 1}. Best is trial 2 with value: 0.249774663287863

Early stopping: No improvement in 10 trials.


[I 2025-06-10 19:43:47,087] A new study created in memory with name: LightGBM


XGBoost finished in 3871.48 seconds.

Best Parameters: {'max_depth': 5, 'min_child_weight': 10, 'n_estimators': 298, 'learning_rate': 0.05113596608549376, 'subsample': 0.7482944515367064, 'colsample_bytree': 0.8021429739691673, 'max_delta_step': 10}
Best Average Precision Score (Optuna): 0.25248494337515437

Processing LightGBM...


[I 2025-06-10 19:44:43,431] Trial 0 finished with value: 0.22249776645800728 and parameters: {'max_depth': 7, 'n_estimators': 124, 'learning_rate': 0.013072267682324395, 'num_leaves': 43, 'min_child_samples': 47, 'subsample': 0.7813628108607398, 'colsample_bytree': 0.8303136462424089}. Best is trial 0 with value: 0.22249776645800728.
[I 2025-06-10 19:45:29,812] Trial 1 finished with value: 0.24708527159676477 and parameters: {'max_depth': 4, 'n_estimators': 149, 'learning_rate': 0.06036795251448346, 'num_leaves': 79, 'min_child_samples': 32, 'subsample': 0.7095200168074494, 'colsample_bytree': 0.7583972913629193}. Best is trial 1 with value: 0.24708527159676477.
[I 2025-06-10 19:46:23,937] Trial 2 finished with value: 0.2487148130341542 and parameters: {'max_depth': 5, 'n_estimators': 201, 'learning_rate': 0.061037196628398974, 'num_leaves': 120, 'min_child_samples': 10, 'subsample': 0.7546092611624005, 'colsample_bytree': 0.8904115901964113}. Best is trial 2 with value: 0.248714813034

Early stopping: No improvement in 10 trials.


[I 2025-06-10 20:01:41,939] A new study created in memory with name: CatBoost


LightGBM finished in 1074.66 seconds.

Best Parameters: {'max_depth': 6, 'n_estimators': 146, 'learning_rate': 0.06854658462312682, 'num_leaves': 36, 'min_child_samples': 49, 'subsample': 0.9551128155998938, 'colsample_bytree': 0.8916958625383079}
Best Average Precision Score (Optuna): 0.2502215587308644

Processing CatBoost...


[I 2025-06-10 20:04:33,443] Trial 0 finished with value: 0.24045864012948814 and parameters: {'depth': 6, 'iterations': 120, 'learning_rate': 0.09047555267706704, 'l2_leaf_reg': 3.455872424639031, 'border_count': 58}. Best is trial 0 with value: 0.24045864012948814.
[I 2025-06-10 20:07:23,166] Trial 1 finished with value: 0.22863329568438334 and parameters: {'depth': 5, 'iterations': 122, 'learning_rate': 0.03757553545539814, 'l2_leaf_reg': 3.6393102262301174, 'border_count': 120}. Best is trial 0 with value: 0.24045864012948814.
[I 2025-06-10 20:10:57,039] Trial 2 finished with value: 0.2294881593345937 and parameters: {'depth': 5, 'iterations': 294, 'learning_rate': 0.017303083147985465, 'l2_leaf_reg': 1.5313370020107224, 'border_count': 125}. Best is trial 0 with value: 0.24045864012948814.
[I 2025-06-10 20:14:08,848] Trial 3 finished with value: 0.21256932579551954 and parameters: {'depth': 6, 'iterations': 152, 'learning_rate': 0.010787294966406421, 'l2_leaf_reg': 2.57002066655717

Early stopping: No improvement in 10 trials.
CatBoost finished in 9373.40 seconds.

Best Parameters: {'depth': 5, 'iterations': 283, 'learning_rate': 0.09859326359218393, 'l2_leaf_reg': 3.6492842109610466, 'border_count': 74}
Best Average Precision Score (Optuna): 0.2512377654895433



Unnamed: 0,Params,PR-AUC,PR-AUC (CV),ROC-AUC,F1-Score (Payment Difficulties),F1-Score (Other Cases),Accuracy
XGBoost,"{  ""max_depth"": 5,  ""min_child_weight"": 10,  ""n_estimators"": 298,  ""learning_rate"": 0.05113596608549376,  ""subsample"": 0.7482944515367064,  ""colsample_bytree"": 0.8021429739691673,  ""max_delta_step"": 10 }",0.252485,0.252485,0.766899,0.27995,0.829713,0.724566
LightGBM,"{  ""max_depth"": 6,  ""n_estimators"": 146,  ""learning_rate"": 0.06854658462312682,  ""num_leaves"": 36,  ""min_child_samples"": 49,  ""subsample"": 0.9551128155998938,  ""colsample_bytree"": 0.8916958625383079 }",0.250222,0.250222,0.7656,0.277851,0.824768,0.717973
CatBoost,"{  ""depth"": 5,  ""iterations"": 283,  ""learning_rate"": 0.09859326359218393,  ""l2_leaf_reg"": 3.6492842109610466,  ""border_count"": 74 }",0.251238,0.251238,0.766472,0.277041,0.820969,0.713009


After tuning we see that:
* PR-AUC (CV) is similar to all models, with highest being XGBoost (25.25%), but all PR-AUC are around 25%.