# Discovering MlflowClient API 

- Create experiment
- List experiments
- Discovering experiment runs
- Manipulating runs info and data
- Model register
- Model versioning
- Testing models as a deployment engineer 
- Modify model alieses 

In [1]:
import boto3

s3 = boto3.client("s3")
buckets = s3.list_buckets()
print([bucket["Name"] for bucket in buckets["Buckets"]])

['mlflow-store-rem']


In [1]:
import os

os.environ["AWS_PROFILE"] = "default"

In [2]:
from mlflow.tracking import MlflowClient
from mlflow.entities import ViewType

TRACKING_SERVER_HOST = "ec2-16-170-240-86.eu-north-1.compute.amazonaws.com"
MLFLOW_PORT = 5000
TRACKING_URI = f"http://{TRACKING_SERVER_HOST}:{MLFLOW_PORT}"

client = MlflowClient(tracking_uri=f"{TRACKING_URI}")

### Experiments

In [3]:
client.search_experiments()

[<Experiment: artifact_location='s3://mlflow-store-rem/17', creation_time=1750563684003, experiment_id='17', last_update_time=1750563684003, lifecycle_stage='active', name='nyc-taxi-experiment-aws13', tags={'data': 'nyc-taxi', 'developer': 'kamal'}>,
 <Experiment: artifact_location='s2://mlflow-store-rem/0', creation_time=1750495005398, experiment_id='0', last_update_time=1750495005398, lifecycle_stage='active', name='Default', tags={}>]

In [22]:
EXPERIMENT_NAME = "nyc-taxi-experiment-aws-2"

In [23]:
experiment_tags = {
  "developer": "kamal",
  "data": "nyc-taxi"
}

# created new experiment
# articat location parameter if None defaults to "mlfurn/{experiment_id}"
experiment_id = client.create_experiment(EXPERIMENT_NAME, tags=experiment_tags)

In [24]:
# dicovering existing experiment
existing_experiments = client.search_experiments()

print(f"There exist {len(existing_experiments)} experiments")
for exp in sorted(existing_experiments, key=lambda exp: exp.experiment_id):
  print(f"{exp.experiment_id}: {exp.name}")
  for key, value in exp.tags.items():
    print(f"\t{key}: {value}")

There exist 4 experiments
0: Default
17: nyc-taxi-experiment-aws13
	developer: kamal
	data: nyc-taxi
18: nyc-taxi-experiment-aws-1
	developer: kamal
	data: nyc-taxi
19: nyc-taxi-experiment-aws-2
	developer: kamal
	data: nyc-taxi


### Creating some Runs

In [7]:
import xgboost as xgb
from sklearn.metrics import root_mean_squared_error
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.feature_extraction import DictVectorizer

from data_loader import read_dataframe

In [8]:
training_data = ["../data/green_tripdata_2021-01.parquet"]
validation_data = ["../data/green_tripdata_2021-03.parquet"]
test_data = ["../data/green_tripdata_2021-02.parquet"]

In [9]:
df_train = read_dataframe(*training_data)
df_validation = read_dataframe(*validation_data)

df_train.shape, df_validation.shape

((73908, 3), (80372, 3))

In [10]:
features = ['PU_DO', 'trip_distance']
target = 'duration'

dv = DictVectorizer()

X_train = dv.fit_transform(df_train[features].to_dict(orient="records"))
X_validation = dv.transform(df_validation[features].to_dict(orient="records"))

y_train = df_train[target]
y_validation = df_validation[target]

In [26]:
import mlflow
import mlflow.sklearn

mlflow.set_tracking_uri(f"{TRACKING_URI}")
# This method should create the experiment if not exists
# But I created the experiment with same name on the prev step so mlflow will just use it
mlflow.set_experiment(experiment_name=EXPERIMENT_NAME)

mlflow.sklearn.autolog()

In [27]:
mlflow.get_tracking_uri()

'http://ec2-16-170-240-86.eu-north-1.compute.amazonaws.com:5000'

In [28]:
sklearn_models = [
  LinearRegression(), 
  Lasso(alpha=0.01, random_state=42),
  DecisionTreeRegressor(max_depth=3, random_state=42),
  RandomForestRegressor(n_estimators=5, max_depth=3, random_state=42)
]

runs: dict = []

In [29]:
from tqdm import tqdm

for model in tqdm(sklearn_models):
  model_name = str(model)
  model_name = model_name[:model_name.index("(")]
  with mlflow.start_run(run_name=model_name) as current_run:
    lr = model.fit(X_train, y_train)
    runs.append({"id": current_run.info.run_id, "name": current_run.info.run_name})  

 25%|██▌       | 1/4 [00:33<01:41, 33.92s/it]

🏃 View run LinearRegression at: http://ec2-16-170-240-86.eu-north-1.compute.amazonaws.com:5000/#/experiments/19/runs/d21d0ae1d55d45cc935cdae695cc9269
🧪 View experiment at: http://ec2-16-170-240-86.eu-north-1.compute.amazonaws.com:5000/#/experiments/19


 50%|█████     | 2/4 [01:08<01:08, 34.10s/it]

🏃 View run Lasso at: http://ec2-16-170-240-86.eu-north-1.compute.amazonaws.com:5000/#/experiments/19/runs/ac1a5ce4dbf245da80b28d3f8718f06e
🧪 View experiment at: http://ec2-16-170-240-86.eu-north-1.compute.amazonaws.com:5000/#/experiments/19




🏃 View run DecisionTreeRegressor at: http://ec2-16-170-240-86.eu-north-1.compute.amazonaws.com:5000/#/experiments/19/runs/137dcf565ee14464b4f4de79c0a6697a
🧪 View experiment at: http://ec2-16-170-240-86.eu-north-1.compute.amazonaws.com:5000/#/experiments/19




🏃 View run RandomForestRegressor at: http://ec2-16-170-240-86.eu-north-1.compute.amazonaws.com:5000/#/experiments/19/runs/65233a1ad1f2448ea5cdfdf1bb6cf118
🧪 View experiment at: http://ec2-16-170-240-86.eu-north-1.compute.amazonaws.com:5000/#/experiments/19


100%|██████████| 4/4 [02:15<00:00, 33.92s/it]


In [30]:
for run in runs:
  print(f"{run['id']}: {run['name']}")

d21d0ae1d55d45cc935cdae695cc9269: LinearRegression
ac1a5ce4dbf245da80b28d3f8718f06e: Lasso
137dcf565ee14464b4f4de79c0a6697a: DecisionTreeRegressor
65233a1ad1f2448ea5cdfdf1bb6cf118: RandomForestRegressor


### Discovering runs

In [31]:
retrieved_runs = client.search_runs(
  experiment_ids=experiment_id,
  filter_string="",
  run_view_type=ViewType.ACTIVE_ONLY,
  order_by=["metrics.training_root_mean_squared_error ASC"]
)

assert len(retrieved_runs) == len(runs)
for run in retrieved_runs:
  assert run.info.run_id in list(map(lambda run: run["id"], runs))

In [37]:
for run in retrieved_runs:
  print(f"{run.info.run_name}: {run.data.metrics['training_root_mean_squared_error']}")

LinearRegression: 5.6995641181989996
RandomForestRegressor: 6.293870290580047
DecisionTreeRegressor: 6.315541694545283
Lasso: 10.550479041655857


In [38]:
# get run by condition
for run in client.search_runs(experiment_ids=experiment_id, filter_string="metrics.training_root_mean_squared_error < 7", order_by=["metrics.training_root_mean_squared_error ASC"]):
  print(f"{run.info.run_name}: {run.data.metrics['training_root_mean_squared_error']}")

LinearRegression: 5.6995641181989996
RandomForestRegressor: 6.293870290580047
DecisionTreeRegressor: 6.315541694545283


In [39]:
mlflow.search_runs(experiment_ids=experiment_id, filter_string="metrics.training_root_mean_squared_error < 7", order_by=["metrics.training_root_mean_squared_error ASC"])[["run_id", "artifact_uri", "metrics.training_root_mean_squared_error"]]

Unnamed: 0,run_id,artifact_uri,metrics.training_root_mean_squared_error
0,d21d0ae1d55d45cc935cdae695cc9269,s3://mlflow-store-rem/19/d21d0ae1d55d45cc935cd...,5.699564
1,65233a1ad1f2448ea5cdfdf1bb6cf118,s3://mlflow-store-rem/19/65233a1ad1f2448ea5cdf...,6.29387
2,137dcf565ee14464b4f4de79c0a6697a,s3://mlflow-store-rem/19/137dcf565ee14464b4f4d...,6.315542


### Staging models

After discovering the deciding the best models, now you can register some models to the model_registery.
That's where the ML engineer or the deployment engineer testing models and choosing which model to deploy

data scientest => work till ML models staging \
deployment engineer => work on the staging models and decides which to deploy

In [40]:
REGISTERED_MODEL_NAME = "nyc-taxi-model"

#### Note

There are 2 methods to create a containing registry model:
1. using mlflow.register_model(mode_uri, containing_model_name) \
  This method will create the containing model if not exists
2. using client.create_registered_mode(containing_model_name) \
   using client.create_model_version(containing_model_name, model_uri, version_tags, description) \
   This method gives more control over the creation and maintaining of the model and their versions

In [32]:
mlflow.register_model(model_uri="runs:/85aae2e0d958479ba524144afc5fc0b3/model", name=REGISTERED_MODEL_NAME)

2025/06/14 14:22:10 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2025/06/14 14:22:10 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
Successfully registered model 'nyc-taxi-model'.
Created version '1' of model 'nyc-taxi-model'.


<ModelVersion: aliases=[], creation_timestamp=1749900130753, current_stage='None', deployment_job_state=None, description=None, last_updated_timestamp=1749900130753, metrics=None, model_id=None, name='nyc-taxi-model', params=None, run_id='85aae2e0d958479ba524144afc5fc0b3', run_link=None, source='models:/m-71c170708850403d9e5fd0df7cb709b5', status='READY', status_message=None, tags={}, user_id=None, version=1>

In [41]:
from datetime import date

client.create_registered_model(
  name=REGISTERED_MODEL_NAME,
  tags={
    "creator": "kamal",
    "problem": "nyc-taxi",
  },
  description=f"created at {date.today()}"
)

<RegisteredModel: aliases={}, creation_timestamp=1750565152451, deployment_job_id='', deployment_job_state='DEPLOYMENT_JOB_CONNECTION_STATE_UNSPECIFIED', description='created at 2025-06-22', last_updated_timestamp=1750565152451, latest_versions=[], name='nyc-taxi-model', tags={'creator': 'kamal', 'problem': 'nyc-taxi'}>

In [42]:
for run in retrieved_runs:
  client.create_model_version(
    name = REGISTERED_MODEL_NAME,
    source=f"runs:/{run.info.run_id}/model",
    tags={"name": f"{run.info.run_name}"},
    description=f"Moved to registry on {date.today()}"
  )
  
for version in client.search_model_versions(filter_string=f"name = '{REGISTERED_MODEL_NAME}'"):
  print(f"version:{version.version}, URI: {client.get_model_version_download_uri(name=REGISTERED_MODEL_NAME, version=version.version)}")

2025/06/22 07:06:07 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: nyc-taxi-model, version 1
2025/06/22 07:06:08 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: nyc-taxi-model, version 2
2025/06/22 07:06:08 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: nyc-taxi-model, version 3
2025/06/22 07:06:08 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: nyc-taxi-model, version 4


version:4, URI: runs:/ac1a5ce4dbf245da80b28d3f8718f06e/model
version:3, URI: runs:/137dcf565ee14464b4f4de79c0a6697a/model
version:2, URI: runs:/65233a1ad1f2448ea5cdfdf1bb6cf118/model
version:1, URI: runs:/d21d0ae1d55d45cc935cdae695cc9269/model


In [43]:
client.set_registered_model_alias(REGISTERED_MODEL_NAME, "waiting-release", "4")

### ML engineer testing the models to promote good ones

In [44]:
df_test = read_dataframe(test_data)

X_test = dv.transform(df_test[features].to_dict(orient="records"))
y_test = df_test[target]

In [45]:
client.get_model_version_download_uri(REGISTERED_MODEL_NAME, "1")

'runs:/d21d0ae1d55d45cc935cdae695cc9269/model'

In [46]:
import mlflow.sklearn

testing_results = {}
for version in client.search_model_versions(filter_string=f"name = '{REGISTERED_MODEL_NAME}'"): 
  model_uri = client.get_model_version_download_uri(REGISTERED_MODEL_NAME, version.version)
  model = mlflow.sklearn.load_model(model_uri)
  y_test_pred = model.predict(X_test)

  testing_results[version.version] = root_mean_squared_error(y_test, y_test_pred)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

In [47]:
for res in sorted(testing_results.items(), key=lambda item: item[1]):
  print(f"version: {res[0]}: {res[1]:.04f}")

version: 2: 7.0707
version: 3: 7.0890
version: 1: 7.7587
version: 4: 11.1673


In [48]:
# According to the results, we can inspect that version 2 is the best model so far
for alias, model_version in client.get_registered_model(name = REGISTERED_MODEL_NAME).aliases.items():
  print(f"{model_version}: {alias}")

4: waiting-release
