In [1]:
import numpy as np
import mlflow
from mlflow.tracking import MlflowClient
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
#from xgboost import XGBClassifier
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')

**Sample data**

In [3]:
# Step 1: Create an imbalanced binary classification dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=2, n_redundant=8, 
                           weights=[0.9, 0.1], flip_y=0, random_state=42)

np.unique(y, return_counts=True)

(array([0, 1]), array([900, 100], dtype=int64))

In [5]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

**Model training and registering to MLFlow**

**Model 1: Logistic Regression**

In [13]:
params = {
"solver": "liblinear",
"max_iter": 100000,
"multi_class": "auto",
"random_state":42
}

lr = LogisticRegression(**params)
lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)


In [15]:
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.95      0.96      0.95       270
           1       0.60      0.50      0.55        30

    accuracy                           0.92       300
   macro avg       0.77      0.73      0.75       300
weighted avg       0.91      0.92      0.91       300



In [17]:
report_dict = classification_report(y_test, y_pred, output_dict=True)
report_dict

{'0': {'precision': 0.9454545454545454,
  'recall': 0.9629629629629629,
  'f1-score': 0.9541284403669725,
  'support': 270.0},
 '1': {'precision': 0.6,
  'recall': 0.5,
  'f1-score': 0.5454545454545454,
  'support': 30.0},
 'accuracy': 0.9166666666666666,
 'macro avg': {'precision': 0.7727272727272727,
  'recall': 0.7314814814814814,
  'f1-score': 0.749791492910759,
  'support': 300.0},
 'weighted avg': {'precision': 0.9109090909090909,
  'recall': 0.9166666666666666,
  'f1-score': 0.91326105087573,
  'support': 300.0}}

In [19]:
mlflow.set_experiment("First experiment")

<Experiment: artifact_location='file:///C:/Users/hp/MLFlow/mlruns/519041328492156971', creation_time=1766729950664, experiment_id='519041328492156971', last_update_time=1766729950664, lifecycle_stage='active', name='First experiment', tags={'mlflow.experimentKind': 'custom_model_development'}>

In [21]:
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000/")

In [23]:
with mlflow.start_run(run_name = "logistic_regression_v2"):
    mlflow.log_params(params)
    mlflow.log_metrics(
        {
            "accuracy": report_dict["accuracy"],
            "recall_class_0": report_dict["0"]["recall"],
            "recall_class_1": report_dict["1"]["recall"],
            "precision_class_0": report_dict["0"]["precision"],
            "precision_class_1": report_dict["1"]["precision"],
            "f1_score_class_0": report_dict["0"]["f1-score"],
            "f1_score_class_1": report_dict["1"]["f1-score"],
            
        }
    )
    mlflow.sklearn.log_model(lr, 
                             artifact_path = "model",
                             registered_model_name = "Logistic Regression")

Registered model 'Logistic Regression' already exists. Creating a new version of this model...
2025/12/30 11:40:03 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Logistic Regression, version 2
Created version '2' of model 'Logistic Regression'.


üèÉ View run logistic_regression_v2 at: http://127.0.0.1:5000/#/experiments/519041328492156971/runs/f886bf6ffc3e4c0d83d9fb28cbf20ef0
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/519041328492156971


**Model 2: Random Forest**

In [55]:
params2 = {
    "n_estimators": 20,
    "max_depth":5
    
}

rf = RandomForestClassifier(**params2)
rf.fit(X_train, y_train)

y_pred2 = rf.predict(X_test)

In [57]:
report2 = classification_report(y_test, y_pred2)
print(report2)

              precision    recall  f1-score   support

           0       0.97      1.00      0.99       270
           1       0.96      0.77      0.85        30

    accuracy                           0.97       300
   macro avg       0.97      0.88      0.92       300
weighted avg       0.97      0.97      0.97       300



In [59]:
report2 = classification_report(y_test, y_pred2, output_dict = True)
report2

{'0': {'precision': 0.9746376811594203,
  'recall': 0.9962962962962963,
  'f1-score': 0.9853479853479854,
  'support': 270.0},
 '1': {'precision': 0.9583333333333334,
  'recall': 0.7666666666666667,
  'f1-score': 0.8518518518518519,
  'support': 30.0},
 'accuracy': 0.9733333333333334,
 'macro avg': {'precision': 0.9664855072463768,
  'recall': 0.8814814814814815,
  'f1-score': 0.9185999185999186,
  'support': 300.0},
 'weighted avg': {'precision': 0.9730072463768117,
  'recall': 0.9733333333333334,
  'f1-score': 0.9719983719983721,
  'support': 300.0}}

In [63]:
with mlflow.start_run(run_name = "random_forest_v1"):
    mlflow.log_params(params2)
    mlflow.log_metrics(
        {
            "accuracy": report2["accuracy"],
            "recall_class_0": report2["0"]["recall"],
            "recall_class_1": report2["1"]["recall"],
            "precision_class_0": report2["0"]["precision"],
            "precision_class_1": report2["1"]["precision"],
            "f1_score_class_0": report2["0"]["f1-score"],
            "f1_score_class_1": report2["1"]["f1-score"],
            
        }
    )
    mlflow.sklearn.log_model(rf, 
                             artifact_path = "model",
                              registered_model_name = "Random Forest")

Successfully registered model 'Random Forest'.
2025/12/26 14:37:56 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Random Forest, version 1
Created version '1' of model 'Random Forest'.


üèÉ View run random_forest_v1 at: http://127.0.0.1:5000/#/experiments/519041328492156971/runs/14a1bf3360b24d67a5854eb8b231867e
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/519041328492156971


**Fetch through MLFlow client**

In [26]:
client = MlflowClient()
client.search_registered_models()

[<RegisteredModel: aliases={}, creation_timestamp=1766739921147, deployment_job_id='', deployment_job_state='DEPLOYMENT_JOB_CONNECTION_STATE_UNSPECIFIED', description='', last_updated_timestamp=1767075003897, latest_versions=[<ModelVersion: aliases=[], creation_timestamp=1767075003897, current_stage='None', deployment_job_state=<ModelVersionDeploymentJobState: current_task_name='', job_id='', job_state='DEPLOYMENT_JOB_CONNECTION_STATE_UNSPECIFIED', run_id='', run_state='DEPLOYMENT_JOB_RUN_STATE_UNSPECIFIED'>, description='', last_updated_timestamp=1767075003897, metrics=None, model_id=None, name='Logistic Regression', params=None, run_id='f886bf6ffc3e4c0d83d9fb28cbf20ef0', run_link='', source='models:/m-21a225d1b14444779d9d664382257719', status='READY', status_message=None, tags={}, user_id='', version='2'>], name='Logistic Regression', tags={}>,
 <RegisteredModel: aliases={}, creation_timestamp=1766740075907, deployment_job_id='', deployment_job_state='DEPLOYMENT_JOB_CONNECTION_STATE_

In [32]:
model_name = "Random Forest"
latest_versions = client.get_latest_versions(name = model_name, stages = None)
latest_versions

[<ModelVersion: aliases=[], creation_timestamp=1766740075974, current_stage='None', deployment_job_state=<ModelVersionDeploymentJobState: current_task_name='', job_id='', job_state='DEPLOYMENT_JOB_CONNECTION_STATE_UNSPECIFIED', run_id='', run_state='DEPLOYMENT_JOB_RUN_STATE_UNSPECIFIED'>, description='', last_updated_timestamp=1766740075974, metrics=None, model_id=None, name='Random Forest', params=None, run_id='14a1bf3360b24d67a5854eb8b231867e', run_link='', source='models:/m-d9b9aca0a91c4ecd9ef8b901a443fed1', status='READY', status_message=None, tags={}, user_id='', version='1'>]

In [34]:
latest_version = latest_versions[0].version
latest_version

'1'

In [36]:
loaded_model_uri = f"models:/{model_name}/{latest_version}"
loaded_rf = mlflow.sklearn.load_model(loaded_model_uri)

In [75]:
ypred_load = loaded_rf.predict(X_test)
ypred_load

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0])

In [38]:
model_name = "Logistic Regression"
latest_versions = client.get_latest_versions(name = model_name, stages = None)
latest_versions

[<ModelVersion: aliases=[], creation_timestamp=1767075003897, current_stage='None', deployment_job_state=<ModelVersionDeploymentJobState: current_task_name='', job_id='', job_state='DEPLOYMENT_JOB_CONNECTION_STATE_UNSPECIFIED', run_id='', run_state='DEPLOYMENT_JOB_RUN_STATE_UNSPECIFIED'>, description='', last_updated_timestamp=1767075003897, metrics=None, model_id=None, name='Logistic Regression', params=None, run_id='f886bf6ffc3e4c0d83d9fb28cbf20ef0', run_link='', source='models:/m-21a225d1b14444779d9d664382257719', status='READY', status_message=None, tags={}, user_id='', version='2'>]