In [1]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Step 1: Create an imbalanced binary classification dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=2, n_redundant=8, 
                           weights=[0.9, 0.1], flip_y=0, random_state=42)

np.unique(y, return_counts=True)

(array([0, 1]), array([900, 100]))

In [3]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

In [5]:
from imblearn.combine import SMOTETomek
import numpy as np

smt = SMOTETomek(random_state=42)
X_train_res, y_train_res = smt.fit_resample(X_train, y_train)

np.unique(y_train_res, return_counts=True)


(array([0, 1]), array([619, 619]))

In [6]:
models = [
    (
        "Logistic Regression", 
        {"C": 1, "solver": 'liblinear'},
        LogisticRegression(), 
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "Random Forest", 
        {"n_estimators": 30, "max_depth": 3},
        RandomForestClassifier(), 
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "XGBClassifier",
        {"use_label_encoder": False, "eval_metric": 'logloss'},
        XGBClassifier(), 
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "XGBClassifier With SMOTE",
        {"use_label_encoder": False, "eval_metric": 'logloss'},
        XGBClassifier(), 
        (X_train_res, y_train_res),
        (X_test, y_test)
    )
]

In [7]:
reports = []

for model_name, params, model, train_set, test_set in models:
    X_train = train_set[0]
    y_train = train_set[1]
    X_test = test_set[0]
    y_test = test_set[1]
    
    model.set_params(**params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    report = classification_report(y_test, y_pred, output_dict=True)
    reports.append(report)

In [8]:
import mlflow
import mlflow.sklearn
import mlflow.xgboost

In [11]:
import mlflow
from mlflow.tracking import MlflowClient

# --------------------------------------------------------
# 1. Set tracking URI FIRST
# --------------------------------------------------------
mlflow.set_tracking_uri("http://localhost:5000")

client = MlflowClient()
experiment_name = "Anomaly Detection"

# --------------------------------------------------------
# 2. Restore experiment if deleted / or create new one
# --------------------------------------------------------
# Search all experiments (active + deleted)
all_exps = client.search_experiments(view_type=2)  # ViewType.ALL

experiment_id = None
for e in all_exps:
    if e.name == experiment_name:
        if e.lifecycle_stage == "deleted":
            print("Restoring deleted experiment...")
            client.restore_experiment(e.experiment_id)
        experiment_id = e.experiment_id
        break

# If experiment was not found at all, create it
if experiment_id is None:
    experiment_id = client.create_experiment(experiment_name)
    print(f"Created new experiment with ID: {experiment_id}")

# Set active experiment
mlflow.set_experiment(experiment_name)

# --------------------------------------------------------
# 3. Loop through models and log everything
# --------------------------------------------------------
for i, element in enumerate(models):
    model_name = element[0]
    params = element[1]
    model = element[2]
    report = reports[i]

    with mlflow.start_run(run_name=model_name, experiment_id=experiment_id):

        # Log params
        mlflow.log_params(params)

        # Log metrics
        mlflow.log_metrics({
            'accuracy': report['accuracy'],
            'recall_class_1': report['1']['recall'],
            'recall_class_0': report['0']['recall'],
            'f1_score_macro': report['macro avg']['f1-score']
        })

        # Log model
        if "XGB" in model_name:
            mlflow.xgboost.log_model(model, "model")
        else:
            mlflow.sklearn.log_model(model, "model")

print("‚úÖ Logging complete! Open http://localhost:5000 to view runs.")


Restoring deleted experiment...




üèÉ View run Logistic Regression at: http://localhost:5000/#/experiments/753153319883687930/runs/7243c63e184b4f3996443db699466d8d
üß™ View experiment at: http://localhost:5000/#/experiments/753153319883687930




üèÉ View run Random Forest at: http://localhost:5000/#/experiments/753153319883687930/runs/eeb6a1d4d6fb4502a27fc5d9829aaf78
üß™ View experiment at: http://localhost:5000/#/experiments/753153319883687930




üèÉ View run XGBClassifier at: http://localhost:5000/#/experiments/753153319883687930/runs/1358a58d80e240cbac26d62b9055a383
üß™ View experiment at: http://localhost:5000/#/experiments/753153319883687930




üèÉ View run XGBClassifier With SMOTE at: http://localhost:5000/#/experiments/753153319883687930/runs/268f4073e8b4429cb45f2f2c74161e47
üß™ View experiment at: http://localhost:5000/#/experiments/753153319883687930
‚úÖ Logging complete! Open http://localhost:5000 to view runs.


### Register the Model

In [17]:
model_name = "XGB-Smote"        # name in the registry
run_id = input("Enter Run ID: ")   # use a run that FINISHED successfully

model_uri = f"runs:/{run_id}/model"    # <--- FIXED

result = mlflow.register_model(
    model_uri=model_uri,
    name=model_name
)

print("Registered:", result)


Enter Run ID:  c6661c9e7d1041809e7af289f85569e1


Registered model 'XGB-Smote' already exists. Creating a new version of this model...
2025/12/01 14:13:39 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: XGB-Smote, version 1


Registered: <ModelVersion: aliases=[], creation_timestamp=1764578619501, current_stage='None', deployment_job_state=<ModelVersionDeploymentJobState: current_task_name='', job_id='', job_state='DEPLOYMENT_JOB_CONNECTION_STATE_UNSPECIFIED', run_id='', run_state='DEPLOYMENT_JOB_RUN_STATE_UNSPECIFIED'>, description='', last_updated_timestamp=1764578619501, metrics=None, model_id=None, name='XGB-Smote', params=None, run_id='c6661c9e7d1041809e7af289f85569e1', run_link='', source='models:/m-eaf5ae56d15040cc85fdc00cb7e1dc4b', status='READY', status_message=None, tags={}, user_id='', version='1'>


Created version '1' of model 'XGB-Smote'.


### Load the Model

In [18]:
model_version = 1
model_uri = f"models:/{model_name}/{model_version}"

loaded_model = mlflow.xgboost.load_model(model_uri)
y_pred = loaded_model.predict(X_test)
y_pred[:4]

array([0, 0, 0, 0])

### Transition the Model to Production

In [25]:
from mlflow.tracking import MlflowClient
client = MlflowClient()
model_name = "XGB-Smote"

versions = client.search_model_versions(f"name = '{model_name}'")
for mv in versions:
    print("version:", mv.version,
          "stage:", mv.current_stage,
          "run_id:", mv.run_id,
          "source:", mv.source)


version: 1 stage: None run_id: c6661c9e7d1041809e7af289f85569e1 source: models:/m-eaf5ae56d15040cc85fdc00cb7e1dc4b


In [26]:
# configure
model_name = "XGB-Smote"
src_version = "1"                     # pick from step A
dst_registered_name = "anomaly-detection-prod"

client = MlflowClient()
src_uri = f"models:/{model_name}/{src_version}"
print("Copying from:", src_uri, "to:", dst_registered_name)

mv = client.copy_model_version(src_model_uri=src_uri, dst_name=dst_registered_name)
print("Copied -> name:", mv.name, "version:", mv.version)


Copying from: models:/XGB-Smote/1 to: anomaly-detection-prod
Copied -> name: anomaly-detection-prod version: 2


Registered model 'anomaly-detection-prod' already exists. Creating a new version of this model...
Copied version '1' of model 'XGB-Smote' to version '2' of model 'anomaly-detection-prod'.


In [32]:
from mlflow.tracking import MlflowClient
client = MlflowClient()

alias_to_find = "challenger"
found = False

# search_registered_models() returns registered model metadata
for rm in client.search_registered_models():
    name = rm.name
    try:
        mv = client.get_model_version_by_alias(name, alias_to_find)
        print(f"Alias '{alias_to_find}' found on model: {name} -> version {mv.version}")
        found = True
    except Exception:
        # not found for this model: ignore
        pass

if not found:
    print(f"Alias '{alias_to_find}' not found on any registered model.")


Alias 'challenger' found on model: anomaly-detection-prod -> version 1


In [33]:
# load the aliased model directly for inference
model = mlflow.pyfunc.load_model("models:/anomaly-detection-prod@challenger")
preds = model.predict(X_test)


In [34]:
from mlflow.tracking import MlflowClient
client = MlflowClient()
mv = client.get_model_version_by_alias("anomaly-detection-prod", "challenger")   # returns version object
client.transition_model_version_stage(name="anomaly-detection-prod", version=mv.version, stage="Production", archive_existing_versions=True)


<ModelVersion: aliases=['challenger'], creation_timestamp=1764579062715, current_stage='Production', deployment_job_state=<ModelVersionDeploymentJobState: current_task_name='', job_id='', job_state='DEPLOYMENT_JOB_CONNECTION_STATE_UNSPECIFIED', run_id='', run_state='DEPLOYMENT_JOB_RUN_STATE_UNSPECIFIED'>, description='', last_updated_timestamp=1764581767852, metrics=None, model_id=None, name='anomaly-detection-prod', params=None, run_id='c6661c9e7d1041809e7af289f85569e1', run_link='', source='models:/XGB-Smote/1', status='READY', status_message=None, tags={}, user_id='', version='1'>

In [35]:
model = mlflow.pyfunc.load_model("models:/anomaly-detection-prod@challenger")
preds = model.predict(X_test)
print(preds[:5])


[0 0 0 0 0]
