# Different scenarios for using MLflow

__Scenario 1.__ A single data scientist participating in an ML competition

__Scenario 2.__ A cross-functional team working with a single data scientist on an ML model

__Scenario 3.__ Multiple data scientists working on multiple ML models



## Scenario 1.

She does not need to share her runs with other

- Local tracking server is enough. Does not need a remote tracking server 
- Using model registry is useless. DS is not interested in deploying model in production

MLflow setup:

- Tracking server: no
- Backend store: local filesystem
- Artifacts store: local filesystem

The experiments can be explored locally by launching the MLflow UI.


In [1]:
import mlflow

In [None]:
# as we didnot specify tracking uri, it assumes you want to use your localfilesystem
# i.e. 'mlruns' folder, to store artifacts and metadata about the experiment.
# Note that for an experiments its info is saved in: mlruns/<exp_id>/<run_id>
# the following, gives tracking uri
print(f"tracking URI: '{mlflow.get_tracking_uri()}'")

tracking URI: 'sqlite:///mlflow.db'


<Experiment: artifact_location='/workspaces/mlops-zoomcamp/02-experiment tracking/mlruns/1', creation_time=1747168400782, experiment_id='1', last_update_time=1747168400782, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>

##### Creating an experiment and logging a new run

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

# we do not use sqlite: instead we use file system
mlflow.set_experiment("scenario-1")

with mlflow.start_run():

    X, y = load_iris(return_X_y=True)

    params = {"C": 0.1, "random_state": 42}
    mlflow.log_params(params)

    lr = LogisticRegression(**params).fit(X, y)
    y_pred = lr.predict(X)
    mlflow.log_metric("accuracy", accuracy_score(y, y_pred))

    mlflow.sklearn.log_model(lr, artifact_path="models")
    print(f"default artifacts URI: '{mlflow.get_artifact_uri()}'")

MissingConfigException: Yaml file '/workspaces/mlops-zoomcamp/02-experiment tracking/mlruns/1/meta.yaml' does not exist.

In [None]:
# mlflow.show_experiments()

##### Interacting with the model registry

In [None]:
from mlflow.tracking import MlflowClient


client = MlflowClient()
from mlflow.exceptions import MlflowException

try:
    client.list_registered_models()
except MlflowException:
    print("It's not possible to access the model registry :(")

## Scenario 2: A cross-functional team with one data scientist working on an ML model

MLflow setup:

- tracking server: yes, local server
- backend store: sqlite database
- artifacts store: local filesystem

The experiments can be explored locally by accessing the local tracking server.

To run this example you need to launch the mlflow server locally by running the following command in your terminal:

`mlflow server --backend-store-uri sqlite:///backend.db`

In [19]:
import mlflow

mlflow.set_tracking_uri("http://127.0.0.1:5000")

print(f"tracking URI: '{mlflow.get_tracking_uri()}'")

tracking URI: 'http://127.0.0.1:5000'


In [20]:
mlflow.search_experiments()

[<Experiment: artifact_location='mlflow-artifacts:/3', creation_time=1747376168695, experiment_id='3', last_update_time=1747376168695, lifecycle_stage='active', name='scenario-2', tags={}>,
 <Experiment: artifact_location='/workspaces/mlops-zoomcamp/02-experiment tracking/mlruns/1', creation_time=1747168400782, experiment_id='1', last_update_time=1747168400782, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>,
 <Experiment: artifact_location='/workspaces/mlops-zoomcamp/02-experiment tracking/mlruns/0', creation_time=1747168400778, experiment_id='0', last_update_time=1747168400778, lifecycle_stage='active', name='Default', tags={}>]

In [21]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

mlflow.set_experiment("scenario-2")
# mlflow.set_tracking_uri("sqlite:///mlflow.db")

with mlflow.start_run():

    X, y = load_iris(return_X_y=True)

    params = {"C": 0.1, "random_state": 42}
    mlflow.log_params(params)

    lr = LogisticRegression(**params).fit(X, y)
    y_pred = lr.predict(X)
    mlflow.log_metric("accuracy", accuracy_score(y, y_pred))

    mlflow.sklearn.log_model(lr, artifact_path="models")
    print(f"default artifacts URI: '{mlflow.get_artifact_uri()}'")



default artifacts URI: 'mlflow-artifacts:/3/ad2298b4b2ae4be6af486bb34f7fca27/artifacts'
🏃 View run bedecked-mare-138 at: http://127.0.0.1:5000/#/experiments/3/runs/ad2298b4b2ae4be6af486bb34f7fca27
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/3


In [22]:
mlflow.search_experiments()

[<Experiment: artifact_location='mlflow-artifacts:/3', creation_time=1747376168695, experiment_id='3', last_update_time=1747376168695, lifecycle_stage='active', name='scenario-2', tags={}>,
 <Experiment: artifact_location='/workspaces/mlops-zoomcamp/02-experiment tracking/mlruns/1', creation_time=1747168400782, experiment_id='1', last_update_time=1747168400782, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>,
 <Experiment: artifact_location='/workspaces/mlops-zoomcamp/02-experiment tracking/mlruns/0', creation_time=1747168400778, experiment_id='0', last_update_time=1747168400778, lifecycle_stage='active', name='Default', tags={}>]

##### Interacting with the model registry

In [23]:
from mlflow.tracking import MlflowClient


client = MlflowClient("http://127.0.0.1:5000")

In [24]:
client.search_registered_models()

[<RegisteredModel: aliases={}, creation_timestamp=1747334085049, description='', last_updated_timestamp=1747342848420, latest_versions=[<ModelVersion: aliases=[], creation_timestamp=1747334085074, current_stage='Production', description=None, last_updated_timestamp=1747342848420, name='nyc-taxi-regressor', run_id='65a7096112cb41b693e557e8008cfa9c', run_link='', source=('/workspaces/mlops-zoomcamp/02-experiment '
  'tracking/mlruns/1/65a7096112cb41b693e557e8008cfa9c/artifacts/model'), status='READY', status_message=None, tags={'model': 'xgboost'}, user_id='', version='1'>,
  <ModelVersion: aliases=[], creation_timestamp=1747334183102, current_stage='Archived', description='The model version 2 was transitioned to Production on 2025-05-15', last_updated_timestamp=1747342848420, name='nyc-taxi-regressor', run_id='4aa739211db94627b1744e36f4ee9664', run_link='', source=('/workspaces/mlops-zoomcamp/02-experiment '
  'tracking/mlruns/1/4aa739211db94627b1744e36f4ee9664/artifacts/model'), status

In [45]:
# # Search runs in a specific experiment (by ID)
runs = client.search_runs(
    experiment_ids=["3"],             # Can be a list of one or more experiment IDs
    filter_string="",                 # Optional: filter by metrics, params, etc.
    order_by=["attributes.start_time DESC"],  # Sort by most recent
    max_results=5                     # Limit number of runs returned
)

# Print run IDs
for run in runs:
    print(f"Run ID: {run.info.run_id}")

Run ID: ad2298b4b2ae4be6af486bb34f7fca27


In [46]:
run_id = 'ad2298b4b2ae4be6af486bb34f7fca27'
mlflow.register_model(
    model_uri=f"runs:/{run_id}/models",
    name='iris-classifier'
)

Successfully registered model 'iris-classifier'.
2025/05/16 06:27:06 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: iris-classifier, version 1
Created version '1' of model 'iris-classifier'.


<ModelVersion: aliases=[], creation_timestamp=1747376826511, current_stage='None', description='', last_updated_timestamp=1747376826511, name='iris-classifier', run_id='ad2298b4b2ae4be6af486bb34f7fca27', run_link='', source='mlflow-artifacts:/3/ad2298b4b2ae4be6af486bb34f7fca27/artifacts/models', status='READY', status_message=None, tags={}, user_id='', version='1'>

## Scenario 3: Multiple data scientists working on multiple ML models

MLflow setup:

- Tracking server: yes, remote server (EC2).
- Backend store: postgresql database.
- Artifacts store: s3 bucket.

The experiments can be explored by accessing the remote server.

The example uses AWS to host a remote server. In order to run the example you'll need an AWS account. Follow the steps described in the file `mlflow_on_aws.md` to create a new AWS account and launch the tracking server.

In [None]:
import mlflow
from azureml.core import Workspace
from azureml.core.authentication import InteractiveLoginAuthentication
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from mlflow.tracking import MlflowClient


In [None]:
# 1. Authenticate to Azure
interactive_auth = InteractiveLoginAuthentication()

# 2. Connect to your Azure ML Workspace
ws = Workspace(
    subscription_id="your-subscription-id",
    resource_group="your-resource-group",
    workspace_name="your-workspace-name",
    auth=interactive_auth
)

# 3. Set the MLflow tracking URI to Azure ML
mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())
print(f"Tracking URI: {mlflow.get_tracking_uri()}")


In [None]:
# 4. Set experiment
mlflow.set_experiment("my-experiment-1")

# 5. Train and log a model
with mlflow.start_run() as run:
    X, y = load_iris(return_X_y=True)

    params = {"C": 0.1, "random_state": 42}
    mlflow.log_params(params)

    lr = LogisticRegression(**params).fit(X, y)
    y_pred = lr.predict(X)
    mlflow.log_metric("accuracy", accuracy_score(y, y_pred))

    # Log model to MLflow
    mlflow.sklearn.log_model(lr, artifact_path="models")
    print(f"Artifacts URI: {mlflow.get_artifact_uri()}")

    run_id = run.info.run_id

In [None]:
# 6. Register the model in Azure ML Model Registry
result = mlflow.register_model(
    model_uri=f"runs:/{run_id}/models",
    name="iris-classifier"
)

In [None]:
# 7. (Optional) Transition the model to "Production"
client = MlflowClient()
client.transition_model_version_stage(
    name="iris-classifier",
    version=result.version,
    stage="Production",
    archive_existing_versions=True
)

print(f"Model 'iris-classifier' version {result.version} is now in Production.")