In [4]:
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')

In [5]:
X, y = make_classification(n_samples=1000, n_features=10, n_informative=2, n_redundant=8, weights=[0.9, 0.1], flip_y=0, random_state=42)

In [6]:
X.shape

(1000, 10)

In [7]:
y.shape

(1000,)

In [8]:
np.unique(y, return_counts=True)

(array([0, 1]), array([900, 100]))

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [10]:
X_train.shape

(700, 10)

In [11]:
X_test.shape

(300, 10)

In [12]:
y_train.shape

(700,)

In [13]:
y_test.shape

(300,)

####Handle Imbalance###

In [14]:
from imblearn.combine import SMOTETomek

smt = SMOTETomek(random_state=42)
X_train_res, y_train_res = smt.fit_resample(X_train, y_train)
np.unique(y_train_res, return_counts=True)

(array([0, 1]), array([619, 619]))

In [15]:
models = [
    ("Logistic Regression",
    {"C": 1, "solver": 'liblinear'},
    LogisticRegression(),
    (X_train, y_train), 
    (X_test, y_test)
    ),
    
    ("Random Forest",
    {"n_estimators": 30, "max_depth": 3},
    RandomForestClassifier(),
    (X_train, y_train),
    (X_test, y_test)
    ),
    
    ("XGBClassifier",
    {"use_label_encoder": False, "eval_metric": 'logloss'},
    XGBClassifier(),
    (X_train, y_train),
    (X_test, y_test)
    ),
    
    ("XGBClassifier with SMOTE",
    {"use_label_encoder": False, "eval_metric": 'logloss'},
    XGBClassifier(),
    (X_train_res,y_train_res),
    (X_test, y_test)
    )
]

In [16]:
reports = []

for model_name, params, model, train_set, test_set in models:
    X_train = train_set[0]
    y_train = train_set[1]
    X_test = test_set[0]
    y_test = test_set[1]

    model.set_params(**params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    report = classification_report(y_test, y_pred, output_dict=True)
    reports.append(report)

In [22]:
import mlflow
import mlflow.sklearn
import mlflow.xgboost
print("Tracking URI:", mlflow.get_tracking_uri())

Tracking URI: file:///C:/code/ML/mlops_tools_modelregistry_resources/mlruns


####Initialize MLFlow ###

In [34]:
mlflow.set_experiment("My MLFLOW Experiments Latest")
mlflow.set_tracking_uri("http://127.0.0.1:5000")

for i, element in enumerate(models):
    model_name = element[0]
    params = element[1]
    model = element[2]
    report = reports[i]

    with mlflow.start_run(run_name=model_name):
        # print("Run ID:", run.info.run_id)
        mlflow.log_params(params)
        mlflow.log_metrics({
            'accuracy': report['accuracy'],
            'recall_class_1': report['1']['recall'],
            'recall_class_0': report['0']['recall'],
            'f1_score_macro': report['macro avg']['f1-score']
        }) 

        if "XGB" in model_name:
            mlflow.xgboost.log_model(model, name="model")
        else:
            mlflow.sklearn.log_model(model, name="model")

2025/12/01 15:17:36 INFO mlflow.tracking.fluent: Experiment with name 'My MLFLOW Experiments Latest' does not exist. Creating a new experiment.


üèÉ View run Logistic Regression at: http://127.0.0.1:5000/#/experiments/868758640585974189/runs/940f8c7b31fe4ca093b96d0020c3e005
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/868758640585974189




üèÉ View run Random Forest at: http://127.0.0.1:5000/#/experiments/868758640585974189/runs/cd8ae649282b4a5f9002ce8a86eb3f1c
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/868758640585974189




üèÉ View run XGBClassifier at: http://127.0.0.1:5000/#/experiments/868758640585974189/runs/d887266e1225450da25a99dddd77e84d
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/868758640585974189




üèÉ View run XGBClassifier with SMOTE at: http://127.0.0.1:5000/#/experiments/868758640585974189/runs/02c2522345c34d50b61a45a64bafcc7a
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/868758640585974189


###REgister the model with high recall, which is XGB with smote###

In [35]:
# model_uri = f"runs:/a082a3939c374276a594c47b49d0615d"
model_name = "XGBClassifier with SMOTE"
run_id=input("Enter Run ID:")
model_uri = f"runs:/{run_id}/model"
result = mlflow.register_model(
    model_uri, model_name
)

Enter Run ID: 02c2522345c34d50b61a45a64bafcc7a


Registered model 'XGBClassifier with SMOTE' already exists. Creating a new version of this model...
2025/12/01 15:18:42 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: XGBClassifier with SMOTE, version 2
Created version '2' of model 'XGBClassifier with SMOTE'.


#### load the mode that was registered###

In [41]:
model_version = 2
model_uri = f"models:/{model_name}@challenger"
loaded_model = mlflow.xgboost.load_model(model_uri)
y_pred = loaded_model.predict(X_test)
y_pred[:4]
# from mlflow import MlflowClient

# client = MlflowClient()
# for mv in client.search_model_versions("name='XGBClassifier with SMOTE'"):
#     print("Version:", mv.version, "Source:", mv.source, "Run ID:", mv.run_id)


array([0, 0, 0, 0])

####Transitioning from dev to production ###

In [42]:
dev_model_uri = f"models:/{model_name}@challenger"
prod_model = "Anamoly detection"

client = mlflow.MlflowClient()
client.copy_model_version(src_model_uri=dev_model_uri, dst_name=prod_model)

Successfully registered model 'Anamoly detection'.
Copied version '2' of model 'XGBClassifier with SMOTE' to version '1' of model 'Anamoly detection'.


<ModelVersion: aliases=[], creation_timestamp=1764584020790, current_stage='None', deployment_job_state=<ModelVersionDeploymentJobState: current_task_name='', job_id='', job_state='DEPLOYMENT_JOB_CONNECTION_STATE_UNSPECIFIED', run_id='', run_state='DEPLOYMENT_JOB_RUN_STATE_UNSPECIFIED'>, description='', last_updated_timestamp=1764584020790, metrics=None, model_id=None, name='Anamoly detection', params=None, run_id='02c2522345c34d50b61a45a64bafcc7a', run_link='', source='models:/XGBClassifier with SMOTE/2', status='READY', status_message=None, tags={}, user_id='', version='1'>

###Testing using prod model###

In [43]:
model_uri = f"models:/{prod_model}@champion"
loaded_model = mlflow.xgboost.load_model(model_uri)
y_pred = loaded_model.predict(X_test)
y_pred[:4]

array([0, 0, 0, 0])