In [1]:
from mlflow.tracking import MlflowClient
from mlflow.entities import Metric, Param, RunTag, Run
import mlflow

In [2]:
# Initialize
client = MlflowClient(tracking_uri='sqlite:///mlflow.db')

2026/01/06 18:25:16 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2026/01/06 18:25:16 INFO mlflow.store.db.utils: Updating database tables
2026/01/06 18:25:16 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/01/06 18:25:16 INFO alembic.runtime.migration: Will assume non-transactional DDL.
2026/01/06 18:25:17 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/01/06 18:25:17 INFO alembic.runtime.migration: Will assume non-transactional DDL.


In [3]:
# List the experiments
exps = client.search_experiments()
for exp in exps:
    print(f"Experiment ID: {exp.experiment_id}, Name: {exp.name}")

exps

Experiment ID: 4, Name: Testing_exp
Experiment ID: 3, Name: Diabetes Prediction - Stacking
Experiment ID: 2, Name: Diabetes Prediction K-Folds
Experiment ID: 1, Name: Diabetes Prediction


[<Experiment: artifact_location='file:///d:/Project/Diabetes-mlflow/notebooks/mlruns/Testing_exp', creation_time=1767460376889, experiment_id='4', last_update_time=1767460376889, lifecycle_stage='active', name='Testing_exp', tags={'mlflow.experimentKind': 'custom_model_development',
  'owner': 'base_user',
  'version': '1.0'}>,
 <Experiment: artifact_location='file:///d:/Project/Diabetes-mlflow/notebooks/mlruns/3', creation_time=1766776529473, experiment_id='3', last_update_time=1766776529473, lifecycle_stage='active', name='Diabetes Prediction - Stacking', tags={'mlflow.experimentKind': 'custom_model_development'}>,
 <Experiment: artifact_location='file:///d:/Project/Diabetes-mlflow/notebooks/mlruns/2', creation_time=1766772444058, experiment_id='2', last_update_time=1766772444058, lifecycle_stage='active', name='Diabetes Prediction K-Folds', tags={'mlflow.experimentKind': 'custom_model_development'}>,
 <Experiment: artifact_location='file:///d:/Project/Diabetes-mlflow/notebooks/mlrun

In [4]:
# Get model from Stacking experiment
experiment = client.get_experiment_by_name("Diabetes Prediction - Stacking")
experiment_id = experiment.experiment_id

runs = client.search_runs(
    experiment_ids=[experiment_id],
    filter_string="params.training_type = 'full_data'",
    order_by=["metrics.overall_auc DESC"],
)

if not runs:
    raise ValueError("No runs found.")

for run in runs:
    print(f"Run ID: {run.info.run_id}")
    print(f"AUC: {run.data.metrics.get('overall_auc')}")
    print(f"Accuracy: {run.data.metrics.get('overall_accuracy')}")
    print(f"Parameters: {run.data.params}")
    print("----" * 20)

Run ID: 6b167471ff5c4cdb880b999e169c5c05
AUC: 0.8375720765976296
Accuracy: 0.8088706793434698
Parameters: {'meta_learner': 'LogisticRegression', 'solver': 'lbfgs', 'max_iter': '1000', 'base_models': 'xgb+lgbm+catboost', 'training_type': 'full_data'}
--------------------------------------------------------------------------------


In [5]:
# Get the best run based on AUC
best_acc = runs[0]
print(f"Best Run ID: {best_acc.info.run_id}")
print(f"AUC: {best_acc.data.metrics.get('overall_auc')}")

best_acc.info

Best Run ID: 6b167471ff5c4cdb880b999e169c5c05
AUC: 0.8375720765976296


<RunInfo: artifact_uri='file:///d:/Project/Diabetes-mlflow/notebooks/mlruns/3/6b167471ff5c4cdb880b999e169c5c05/artifacts', end_time=1766776558532, experiment_id='3', lifecycle_stage='active', run_id='6b167471ff5c4cdb880b999e169c5c05', run_name='meta_learner_final', start_time=1766776549882, status='FINISHED', user_id='MattNg'>

In [6]:
# Register model
MODEL_NAME = "Catboost Diabetes Model"

try:
    client.create_registered_model(MODEL_NAME)
except Exception:
    pass


2026/01/06 18:25:17 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2026/01/06 18:25:17 INFO mlflow.store.db.utils: Updating database tables
2026/01/06 18:25:17 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/01/06 18:25:17 INFO alembic.runtime.migration: Will assume non-transactional DDL.


In [7]:
# Create new model version
run_id = best_acc.info.run_id
model_uri = f"runs:/{run_id}/meta_learner_final"

model_version = client.create_model_version(
    name=MODEL_NAME,
    source=model_uri,
    run_id=run_id
)

print("Created model version:", model_version.version)


Created model version: 5


In [8]:
# Staging model
client.transition_model_version_stage(
    name=MODEL_NAME,
    version=model_version.version,
    stage="Staging",
    archive_existing_versions=True
)
print(f"Model version {model_version.version} transitioned to Staging.")

  client.transition_model_version_stage(


Model version 5 transitioned to Staging.


In [9]:
# Promote model to Production
client.transition_model_version_stage(
    name=MODEL_NAME,
    version=model_version.version,
    stage="Production",
    archive_existing_versions=False
)
print(f"Model version {model_version.version} transitioned to Production.")

Model version 5 transitioned to Production.


  client.transition_model_version_stage(


In [10]:
# Check artifacts 
artifacts = client.list_artifacts(run_id)

print(artifacts)

[<FileInfo: file_size=20781419, is_dir=False, path='meta_features.csv'>]


In [11]:
# Load model for inference with real data
import mlflow.sklearn
import numpy as np
import pandas as pd

# Set tracking URI
mlflow.set_tracking_uri('sqlite:///mlflow.db')

# Load real data
df1 = pd.read_csv('../data/db1.csv')
df2 = pd.read_csv('../data/db2.csv')

# Preprocess
df1['Diabetes_binary'] = df1['Diabetes_binary'].replace({2: 1})
df = pd.concat([df1, df2], ignore_index=True)

X = df.drop('Diabetes_binary', axis=1)
y = df['Diabetes_binary']

# Load meta_features.csv from artifact (logged in main.ipynb)
artifact_path = client.download_artifacts(run_id, "meta_features.csv")
meta_features_df = pd.read_csv(artifact_path)

print(f"Meta features shape: {meta_features_df.shape}")
print(meta_features_df.head())

# Load model
model_sklearn = mlflow.sklearn.load_model(f"models:/{MODEL_NAME}/Production")

# Test with 20 samples
X_test = meta_features_df[['xgb_pred', 'lgbm_pred', 'catboost_pred']].values[:20]
y_test = meta_features_df['true_label'].values[:20]

# Predict
preds = model_sklearn.predict(X_test)
proba = model_sklearn.predict_proba(X_test)[:, 1]

print("\n" + "="*50)
print("TEST RESULTS (20 samples)")
print("="*50)
print(f"True Labels:   {y_test.astype(int)}")
print(f"Predictions:   {preds.astype(int)}")
print(f"Probabilities: {np.round(proba, 3)}")
print(f"\nAccuracy: {(preds == y_test).mean():.2%}")

# Test on full data
X_full = meta_features_df[['xgb_pred', 'lgbm_pred', 'catboost_pred']].values
y_full = meta_features_df['true_label'].values

preds_full = model_sklearn.predict(X_full)
proba_full = model_sklearn.predict_proba(X_full)[:, 1]

from sklearn.metrics import accuracy_score, roc_auc_score, classification_report

print("\n" + "="*50)
print("FULL DATA EVALUATION")
print("="*50)
print(f"Accuracy: {accuracy_score(y_full, preds_full):.4f}")
print(f"ROC-AUC:  {roc_auc_score(y_full, proba_full):.4f}")
print("\nClassification Report:")
print(classification_report(y_full, preds_full))

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 331.07it/s]


Meta features shape: (324372, 4)
   xgb_pred  lgbm_pred  catboost_pred  true_label
0  0.845282   0.848020       0.799341         0.0
1  0.038700   0.039767       0.054829         0.0
2  0.775769   0.637708       0.806338         0.0
3  0.275906   0.328539       0.254358         0.0
4  0.314059   0.271964       0.285841         0.0


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00, 625.31it/s] 



TEST RESULTS (20 samples)
True Labels:   [0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 1 0 0]
Predictions:   [1 0 1 0 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0]
Probabilities: [0.792 0.057 0.793 0.156 0.182 0.165 0.236 0.34  0.706 0.074 0.173 0.694
 0.069 0.523 0.199 0.068 0.357 0.058 0.044 0.057]

Accuracy: 75.00%

FULL DATA EVALUATION
Accuracy: 0.8089
ROC-AUC:  0.8376

Classification Report:
              precision    recall  f1-score   support

         0.0       0.84      0.92      0.88    249049
         1.0       0.63      0.43      0.51     75323

    accuracy                           0.81    324372
   macro avg       0.74      0.68      0.70    324372
weighted avg       0.79      0.81      0.80    324372

