## BankTermPredict: Predicting Client Subscription to Term Deposits from Campaign Data

Modeling  
Source:  https://archive.ics.uci.edu/dataset/222/bank+marketing  
Greg Gibson Sept. 2025

This notebook:
- Loads `bank-full.csv`
- Splits features/target with stratification (80/20)
- Builds three paths:
  - Class-weighted models within sklearn Pipelines using `pipeline.build_preprocessor()`
  - SMOTE on preprocessed dense arrays, then train models
  - LightGBM and CatBoost keep raw categories, skipping one hot encoding
- Compares metrics with emphasis on recall


In [None]:
import os
import time
import joblib
import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient

import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import uniform, loguniform 
from imblearn.over_sampling import SMOTE

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn.model_selection import train_test_split, StratifiedKFold, RandomizedSearchCV, GridSearchCV, cross_validate
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, recall_score, f1_score, make_scorer
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

# Custom preprocessors
from pipeline.preprocessor import build_preprocessor
#from pipeline.preprocessor_smote import smote_preprocessor
from pipeline.preprocessor_noOHE import noOHE_preprocessor


DATA_PATH = os.path.join("..", "data", "bank-full.csv")

df = pd.read_csv(DATA_PATH, sep=";")
print(df.shape)
df.head()

(45211, 17)


Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no


In [70]:
# Separate features and target
df["y"] = (df["y"] == "yes").astype(int)
X = df.drop(columns=["y"])
y = df["y"]

# Train/test split with stratification
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

X_train.shape, X_test.shape, y_train.mean(), y_test.mean()

((36168, 16),
 (9043, 16),
 np.float64(0.11698186241981863),
 np.float64(0.11699657193409267))

Tracking

In [71]:
mlflow.set_tracking_uri("http://127.0.0.1:8080")
mlflow.set_experiment("Bank Term Deposit Prediction")

<Experiment: artifact_location='mlflow-artifacts:/473699037310066725', creation_time=1759356914576, experiment_id='473699037310066725', last_update_time=1759356914576, lifecycle_stage='active', name='Bank Term Deposit Prediction', tags={}>

Experiments

In [72]:
# Define category features for LightGBM and CatBoost
cat_features = ["job", "marital", "contact", "month", "poutcome", "age_group"]

In [73]:
# Define experiments: (preprocessor_name, preprocessor_instance_or_builder, model_instance)
experiments = [
    # Logistic Regression
    ("balanced_preproc", build_preprocessor, LogisticRegression(class_weight="balanced", max_iter=500)),
    ("smote_preproc", build_preprocessor, LogisticRegression(max_iter=500)),

    # Random Forest
    ("balanced_preproc", build_preprocessor, RandomForestClassifier(n_estimators=400, class_weight="balanced")),
    ("smote_preproc", build_preprocessor, RandomForestClassifier(n_estimators=400)),

    # XGBoost
    ("balanced_preproc", build_preprocessor, XGBClassifier(n_estimators=400, max_depth=5, learning_rate=0.08,
                                                           eval_metric="logloss", use_label_encoder=False,
                                                           scale_pos_weight=5)),
    ("smote_preproc", build_preprocessor, XGBClassifier(n_estimators=400, max_depth=5, learning_rate=0.08,
                                                                  eval_metric="logloss", use_label_encoder=False)),

    
    # Adds No One Hot Encoding (OHE) or scaling - for LightGBM and CatBoost category handling

    # LightGBM
    ("balanced_preproc", build_preprocessor, LGBMClassifier(max_depth=-1, learning_rate=0.05, class_weight="balanced")),
    ("smote_preproc", build_preprocessor, LGBMClassifier(max_depth=-1, learning_rate=0.05)),
    ("noOHE_preproc", noOHE_preprocessor, LGBMClassifier(categorical_feature=cat_features, max_depth=-1, learning_rate=0.05, class_weight="balanced")),

    # CatBoost
    ("balanced_preproc", build_preprocessor, CatBoostClassifier(depth=6, learning_rate=0.05,
                                                                   auto_class_weights="Balanced", verbose=0)),
    ("smote_preproc", build_preprocessor, CatBoostClassifier(depth=6, learning_rate=0.05, verbose=0)),
    ("noOHE_preproc", noOHE_preprocessor, CatBoostClassifier(cat_features=cat_features, depth=6, learning_rate=0.05,
                                                                   auto_class_weights="Balanced", verbose=0)),
]


In [74]:
results = []

for preproc_name, preproc, model in experiments:
    with mlflow.start_run(run_name=f"{preproc_name}_{model.__class__.__name__}"):
        # instantiate preprocessor if a builder function was passed
        if callable(preproc):
            preproc_inst = preproc()
        else:
            preproc_inst = preproc

        # --- Preprocess data ---
        X_train_prep = preproc_inst.fit_transform(X_train, y_train)
        X_test_prep = preproc_inst.transform(X_test)

        # --- Determine imbalance strategy ---
        if preproc_name.startswith("smote"):
            sm = SMOTE(random_state=42)
            X_train_prep, y_train_bal = sm.fit_resample(X_train_prep, y_train)
            imbalance_strategy = "SMOTE"
        else:
            y_train_bal = y_train
            imbalance_strategy = "class_weight"

        # --- Determine encoding strategy ---
        if preproc_name.startswith("noOHE"):
            encoding = "noOneHot"
        else:
            encoding = "OneHot"

        # --- Train ---
        model.fit(X_train_prep, y_train_bal)
        y_pred = model.predict(X_test_prep)

        # --- Metrics ---
        recall = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        accuracy = accuracy_score(y_test, y_pred)

        # --- Log params, metrics, and tags ---
        mlflow.log_param("preprocessor", preproc_name)
        mlflow.log_param("model", model.__class__.__name__)
        mlflow.log_metric("recall", recall)
        mlflow.log_metric("f1_score", f1)
        mlflow.log_metric("accuracy", accuracy)

        # Tags (better filtering in MLflow UI)
        mlflow.set_tag("imbalance_handling", imbalance_strategy)
        mlflow.set_tag("encoding", encoding)
        mlflow.set_tag("stage", "baseline_comparison")

        results.append((preproc_name, model.__class__.__name__, recall, f1, accuracy))

# Summary
results_df = pd.DataFrame(results, columns=["Preprocessor", "Model", "Recall", "F1", "Accuracy"])
print(results_df.sort_values("Recall", ascending=False))



üèÉ View run balanced_preproc_LogisticRegression at: http://127.0.0.1:8080/#/experiments/473699037310066725/runs/cdf5bd2ec1184c719eda0005b317cc5f
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/473699037310066725




üèÉ View run smote_preproc_LogisticRegression at: http://127.0.0.1:8080/#/experiments/473699037310066725/runs/b16b12c0f0bd4cd6938bc7dd7759067d
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/473699037310066725




üèÉ View run balanced_preproc_RandomForestClassifier at: http://127.0.0.1:8080/#/experiments/473699037310066725/runs/cb8599991e6947c9afed813cf61741a0
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/473699037310066725




üèÉ View run smote_preproc_RandomForestClassifier at: http://127.0.0.1:8080/#/experiments/473699037310066725/runs/4d799b79a61f4658b3181b6f03f7a735
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/473699037310066725
üèÉ View run balanced_preproc_XGBClassifier at: http://127.0.0.1:8080/#/experiments/473699037310066725/runs/ddd7a1ba29ac409cb223b1007cf4eb5c
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/473699037310066725




üèÉ View run smote_preproc_XGBClassifier at: http://127.0.0.1:8080/#/experiments/473699037310066725/runs/c358e8d0f0af41d1b36d4d39e878c0cf
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/473699037310066725
[LightGBM] [Info] Number of positive: 4231, number of negative: 31937
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002949 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1134
[LightGBM] [Info] Number of data points in the train set: 36168, number of used features: 52
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Start training from score 0.000000
üèÉ View run balanced_preproc_LGBMClassifier at: http://127.0.0.1:8080/#/experiments/473699037310066725/runs/e8c30aabd12e4d4cbda2587a11ee3821
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/473699037310066725




[LightGBM] [Info] Number of positive: 31937, number of negative: 31937
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.017999 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12708
[LightGBM] [Info] Number of data points in the train set: 63874, number of used features: 52
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
üèÉ View run smote_preproc_LGBMClassifier at: http://127.0.0.1:8080/#/experiments/473699037310066725/runs/5053e7329dd0402497c0ce31001ea6c2
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/473699037310066725
[LightGBM] [Info] Number of positive: 4231, number of negative: 31937
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001447 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `fo



üèÉ View run smote_preproc_CatBoostClassifier at: http://127.0.0.1:8080/#/experiments/473699037310066725/runs/d5f13302a5e34c7483c3beab0144a10f
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/473699037310066725
üèÉ View run noOHE_preproc_CatBoostClassifier at: http://127.0.0.1:8080/#/experiments/473699037310066725/runs/55c0b939d95e45548b0fdb47865b770e
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/473699037310066725
        Preprocessor                   Model    Recall        F1  Accuracy
6   balanced_preproc          LGBMClassifier  0.634216  0.451244  0.819529
0   balanced_preproc      LogisticRegression  0.630435  0.384217  0.763574
1      smote_preproc      LogisticRegression  0.620038  0.380510  0.763795
9   balanced_preproc      CatBoostClassifier  0.616257  0.463068  0.832799
8      noOHE_preproc          LGBMClassifier  0.600189  0.423616  0.808913
11     noOHE_preproc      CatBoostClassifier  0.580340  0.428173  0.818644
4   balanced_preproc       

Tuning

In [75]:
models = {
    "logreg": Pipeline([
        ("preprocessor", build_preprocessor()),
        ("clf", LogisticRegression())
    ]),
    "lgbm": Pipeline([
        ("preprocessor", build_preprocessor()),
        ("clf", LGBMClassifier(random_state=42))
    ]),
    "lgbm_noOHE": Pipeline([
        ("preprocessor", noOHE_preprocessor()),
        ("clf", LGBMClassifier(categorical_feature=cat_features, random_state=42))
    ]),
    "catboost": Pipeline([
        ("preprocessor", build_preprocessor()),
        ("clf", CatBoostClassifier(verbose=0, random_state=42))
    ]),
    "catboost_noOHE": Pipeline([
        ("preprocessor", noOHE_preprocessor()),
        ("clf", CatBoostClassifier(cat_features=cat_features, verbose=0, random_state=42))
    ])
}

In [76]:
# Hyperparameter tuning for top models
param_grids = {
    "logreg": {
        "clf__C": loguniform(1e-3, 10),
        "clf__penalty": ["l1", "l2"],
        "clf__solver": ["liblinear", "saga"],
        "clf__max_iter": [200, 500],
        "clf__class_weight": ["balanced"],
    },
    "lgbm": {
        "clf__num_leaves": np.arange(20, 200, 20),
        "clf__max_depth": [-1, 3, 4, 6],
        "clf__learning_rate": [0.01, 0.05, 0.1],
        "clf__n_estimators": [300, 500],
        "clf__subsample": uniform(0.7, 0.3),
        "clf__colsample_bytree": uniform(0.7, 0.3),
        "clf__scale_pos_weight": [1, 2, 5, 10],
    },
    "lgbm_noOHE": {
        "clf__num_leaves": np.arange(20, 200, 20),
        "clf__max_depth": [-1, 3, 4, 6],
        "clf__learning_rate": [0.01, 0.05, 0.1],
        "clf__n_estimators": [300, 500],
        "clf__subsample": uniform(0.7, 0.3),
        "clf__colsample_bytree": uniform(0.7, 0.3),
        "clf__scale_pos_weight": [1, 2, 5, 10],
     },
    "catboost": {
        "clf__depth": [4, 6, 8],
        "clf__learning_rate": [0.01, 0.05, 0.1],
        "clf__iterations": [300, 500],
        "clf__l2_leaf_reg": [1, 3, 5],
        "clf__class_weights": [[1, 5], [1, 10]],
    },
    "catboost_noOHE": {
        "clf__depth": [4, 6, 8],
        "clf__learning_rate": [0.01, 0.05, 0.1],
        "clf__iterations": [300, 500],
        "clf__l2_leaf_reg": [1, 3, 5],
        "clf__class_weights": [[1, 5], [1, 10]],
    }
}

In [77]:
# Cross-validation setup
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scorer = make_scorer(recall_score)

In [78]:
# Training + MLflow logging loop
for name, model in models.items():
    print(f"\nüîé Tuning {name}...")

    search = RandomizedSearchCV(
        estimator=model,
        param_distributions=param_grids[name],
        n_iter=15,
        scoring=scorer,
        cv=cv,
        n_jobs=-1,
        random_state=42,
        verbose=1
    )

    with mlflow.start_run(run_name=f"{name}_tuned"):
        mlflow.sklearn.autolog(log_models=False)  # we'll log manually

        # Fit search
        search.fit(X_train, y_train)
        best_model = search.best_estimator_   # full pipeline

        # Predictions
        y_pred = best_model.predict(X_test)

        # Metrics
        recall = recall_score(y_test, y_pred)
        cm = confusion_matrix(y_test, y_pred)

        # Log recall + params
        mlflow.log_metric("test_recall", recall)
        mlflow.log_params(search.best_params_)

        # Confusion matrix plot
        plt.figure(figsize=(5,4))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False)
        plt.xlabel("Predicted")
        plt.ylabel("Actual")
        plt.title(f"Confusion Matrix - {name}")
        plt.tight_layout()
        plt.savefig(f"cm_{name}.png", dpi=300, bbox_inches="tight")
        mlflow.log_artifact(f"cm_{name}.png")
        plt.close()

        # ‚úÖ Log the entire pipeline
        mlflow.sklearn.log_model(best_model, artifact_path="model")

        print(f"‚úÖ {name} best CV recall: {search.best_score_:.4f}")
        print(f"‚úÖ {name} test recall: {recall:.4f}")
        print(f"Best params: {search.best_params_}")


üîé Tuning logreg...
Fitting 5 folds for each of 15 candidates, totalling 75 fits


2025/10/03 23:57:22 INFO mlflow.sklearn.utils: Logging the 5 best runs, 10 runs will be omitted.


‚úÖ logreg best CV recall: 0.6771
‚úÖ logreg test recall: 0.6938
Best params: {'clf__C': np.float64(7.8527554947242555), 'clf__class_weight': 'balanced', 'clf__max_iter': 200, 'clf__penalty': 'l2', 'clf__solver': 'saga'}
üèÉ View run logreg_tuned at: http://127.0.0.1:8080/#/experiments/473699037310066725/runs/5b76dada01244d179b6edf4e865567fe
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/473699037310066725

üîé Tuning lgbm...
Fitting 5 folds for each of 15 candidates, totalling 75 fits
[LightGBM] [Info] Number of positive: 4231, number of negative: 31937
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002667 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1134
[LightGBM] [Info] Number of data points in the train set: 36168, number of used features: 52
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.116982 -> initscore=

2025/10/03 23:59:43 INFO mlflow.sklearn.utils: Logging the 5 best runs, 10 runs will be omitted.


‚úÖ lgbm best CV recall: 0.7102
‚úÖ lgbm test recall: 0.7259
Best params: {'clf__colsample_bytree': np.float64(0.9313811040057837), 'clf__learning_rate': 0.01, 'clf__max_depth': 3, 'clf__n_estimators': 300, 'clf__num_leaves': np.int64(180), 'clf__scale_pos_weight': 10, 'clf__subsample': np.float64(0.958931027762678)}
üèÉ View run lgbm_tuned at: http://127.0.0.1:8080/#/experiments/473699037310066725/runs/ad1c9dd2154b4dbfadd9c9e59847abaf
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/473699037310066725

üîé Tuning lgbm_noOHE...
Fitting 5 folds for each of 15 candidates, totalling 75 fits
[LightGBM] [Info] Number of positive: 4231, number of negative: 31937
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001777 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 45
[LightGBM] [Info] Number of data points in the train set: 36168

2025/10/04 00:01:45 INFO mlflow.sklearn.utils: Logging the 5 best runs, 10 runs will be omitted.


‚úÖ lgbm_noOHE best CV recall: 0.6597
‚úÖ lgbm_noOHE test recall: 0.6805
Best params: {'clf__colsample_bytree': np.float64(0.9316734307889971), 'clf__learning_rate': 0.1, 'clf__max_depth': 4, 'clf__n_estimators': 300, 'clf__num_leaves': np.int64(20), 'clf__scale_pos_weight': 10, 'clf__subsample': np.float64(0.9187021504122961)}
üèÉ View run lgbm_noOHE_tuned at: http://127.0.0.1:8080/#/experiments/473699037310066725/runs/dd237972d06e4730913a56e96f3af063
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/473699037310066725

üîé Tuning catboost...
Fitting 5 folds for each of 15 candidates, totalling 75 fits


2025/10/04 00:06:57 INFO mlflow.sklearn.utils: Logging the 5 best runs, 10 runs will be omitted.


‚úÖ catboost best CV recall: 0.7315
‚úÖ catboost test recall: 0.7439
Best params: {'clf__learning_rate': 0.01, 'clf__l2_leaf_reg': 1, 'clf__iterations': 500, 'clf__depth': 6, 'clf__class_weights': [1, 10]}
üèÉ View run catboost_tuned at: http://127.0.0.1:8080/#/experiments/473699037310066725/runs/73b11716238e49238accbecdebaf9c8a
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/473699037310066725

üîé Tuning catboost_noOHE...
Fitting 5 folds for each of 15 candidates, totalling 75 fits


2025/10/04 00:26:01 INFO mlflow.sklearn.utils: Logging the 5 best runs, 10 runs will be omitted.


‚úÖ catboost_noOHE best CV recall: 0.6932
‚úÖ catboost_noOHE test recall: 0.7032
Best params: {'clf__learning_rate': 0.01, 'clf__l2_leaf_reg': 1, 'clf__iterations': 500, 'clf__depth': 6, 'clf__class_weights': [1, 10]}
üèÉ View run catboost_noOHE_tuned at: http://127.0.0.1:8080/#/experiments/473699037310066725/runs/f57e8c2d8dce4a7caa87f12b82724041
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/473699037310066725


Expanded tuning for best model

In [None]:
best_pipe = Pipeline([
    ("preprocessor", build_preprocessor()),
    ("clf", CatBoostClassifier(verbose=0, random_state=42))
])

In [83]:
param_grid = {
    "clf__depth": [5, 6, 7],
    "clf__iterations": [400, 500, 600],
    "clf__l2_leaf_reg": [1, 2, 3],
    "clf__learning_rate": [0.005, 0.01, 0.02],
    "clf__class_weights": [[1, 5], [1, 8], [1, 10]],
}

In [84]:
search = GridSearchCV(
    estimator=best_pipe,
    param_grid=param_grid,
    scoring="recall",
    cv=cv,
    n_jobs=-1,
    verbose=1
)

In [85]:
# Training + MLflow logging loop
with mlflow.start_run(run_name="CatBoost_grid_tuned_OHE"):
    mlflow.sklearn.autolog(log_models=False)

    # Fit GridSearch
    search.fit(X_train, y_train)
    best_model = search.best_estimator_

    # Predictions
    y_pred = best_model.predict(X_test)

    # Metrics
    recall = recall_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)

    # Log metrics + params
    mlflow.log_metric("test_recall", recall)
    mlflow.log_params(search.best_params_)

    # Plot confusion matrix
    plt.figure(figsize=(5,4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False)
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title("Confusion Matrix - CatBoost (Grid Search)")
    plt.tight_layout()
    plt.savefig("cm_catboost_grid.png", dpi=300, bbox_inches="tight")
    mlflow.log_artifact("cm_catboost_grid.png")
    plt.close()

    # ‚úÖ Log the full pipeline
    mlflow.sklearn.log_model(best_model, artifact_path="model")

    print(f"‚úÖ Best CV recall: {search.best_score_:.4f}")
    print(f"‚úÖ Test recall: {recall:.4f}")
    print("Best params:", search.best_params_)

Fitting 5 folds for each of 243 candidates, totalling 1215 fits


2025/10/04 12:11:18 INFO mlflow.sklearn.utils: Logging the 5 best runs, 238 runs will be omitted.


‚úÖ Best CV recall: 0.7776
‚úÖ Test recall: 0.8034
Best params: {'clf__class_weights': [1, 10], 'clf__depth': 5, 'clf__iterations': 400, 'clf__l2_leaf_reg': 1, 'clf__learning_rate': 0.005}
üèÉ View run CatBoost_grid_tuned_OHE at: http://127.0.0.1:8080/#/experiments/473699037310066725/runs/e427f6b46f1641a99f1005c284008a10
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/473699037310066725


Export Pipeline and Register Tuned Model in MLflow Model Registry

In [None]:
# Save pipeline locally for deployment
local_path = "catboost_best_pipeline.pkl"
joblib.dump(best_model, local_path)
mlflow.log_artifact(local_path)

In [87]:
from mlflow.tracking import MlflowClient

# Get current run ID from active MLflow run
run_id = mlflow.active_run().info.run_id

# Initialize client
client = MlflowClient()

# Choose a registry name (must stay consistent across versions)
model_name = "BankTermPredict_CatBoost"

# Register the model artifact from this run
model_uri = f"runs:/{run_id}/model"

registered_model = client.create_registered_model(model_name)
version = client.create_model_version(
    name=model_name,
    source=model_uri,
    run_id=run_id
)

print(f"‚úÖ Model registered as '{model_name}' version {version.version}")

2025/10/04 12:46:28 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: BankTermPredict_CatBoost, version 1


‚úÖ Model registered as 'BankTermPredict_CatBoost' version 1
