# Discovering MlflowClient API 

- Create experiment
- List experiments
- Discovering experiment runs
- Manipulating runs info and data
- Model register
- Model versioning
- Testing models as a deployment engineer 
- Modify model alieses 

In [3]:
from mlflow.tracking import MlflowClient
from mlflow.entities import ViewType

client = MlflowClient(tracking_uri="sqlite:///mlflow.db")

### Experiments

In [2]:
experiment_tags = {
  "developer": "kamal",
  "data": "nyc-taxi"
}

# created new experiment
# articat location parameter if None defaults to "mlfurn/{experiment_id}"
client.create_experiment("nyc-taxi-experiment", tags=experiment_tags)

'1'

In [7]:
# dicovering existing experiment
existing_experiments = client.search_experiments()

print(f"There exist {len(existing_experiments)} experiments")
for exp in sorted(existing_experiments, key=lambda exp: exp.experiment_id):
  print(f"{exp.experiment_id}: {exp.name}")
  for key, value in exp.tags.items():
    print(f"\t{key}: {value}")

There exist 2 experiments
0: Default
1: nyc-taxi-experiment
	developer: kamal
	data: nyc-taxi


### Creating some Runs

In [1]:
import xgboost as xgb
from sklearn.metrics import root_mean_squared_error
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.feature_extraction import DictVectorizer

from data_loader import read_dataframe

In [2]:
training_data = ["../data/green_tripdata_2021-01.parquet"]
validation_data = ["../data/green_tripdata_2021-03.parquet"]
test_data = ["../data/green_tripdata_2021-04.parquet"]

In [3]:
df_train = read_dataframe(*training_data)
df_validation = read_dataframe(*validation_data)

df_train.shape, df_validation.shape

((73908, 3), (80372, 3))

In [4]:
features = ['PU_DO', 'trip_distance']
target = 'duration'

dv = DictVectorizer()

X_train = dv.fit_transform(df_train[features].to_dict(orient="records"))
X_validation = dv.transform(df_validation[features].to_dict(orient="records"))

y_train = df_train[target]
y_validation = df_validation[target]

In [5]:
import mlflow

mlflow.set_tracking_uri("sqlite:///mlflow.db")
# This method should create the experiment if not exists
# But I created the experiment with same name on the prev step so mlflow will just use it
mlflow.set_experiment(experiment_name="nyc-taxi-experiment")
# mlflow.autolog(disable=True)

2025/06/14 08:28:30 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2025/06/14 08:28:30 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Running upgrade  -> 451aebb31d03, add metric step
INFO  [alembic.runtime.migration] Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags
INFO  [alembic.runtime.migration] Running upgrade 90e64c465722 -> 181f10493468, allow nulls for metric values
INFO  [alembic.runtime.migration] Running upgrade 181f10493468 -> df50e92ffc5e, Add Experiment Tags Table
INFO  [alembic.runtime.migration] Running upgrade df50e92ffc5e -> 7ac759974ad8, Update run tags with larger limit
INFO  [alembic.runtime.migration] Running upgrade 7ac759974ad8 -> 89d4b8295536, create latest metrics table
INFO  [89d4b8295536_create_latest_metrics_table_py] Migration complete!
INFO  

<Experiment: artifact_location='/Users/Kamal/WorkSpace/MLOps-zoomcamp/02. Experiment Tracking/mlruns/1', creation_time=1749878911614, experiment_id='1', last_update_time=1749878911614, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>

In [15]:
sklearn_models = [
  LinearRegression(), 
  Lasso(alpha=0.01, random_state=42),
  DecisionTreeRegressor(max_depth=3, random_state=42),
  RandomForestRegressor(n_estimators=5, max_depth=3, random_state=42)
]

runs = []

In [None]:
for model in sklearn_models:
  model_name = str(model)
  model_name = model_name[:model_name.index("(")]
  with mlflow.start_run(run_name=model_name):
    trained_model = model.fit(X_train, y_train)
    validation_rmse = root_mean_squared_error(y_true = y_validation, y_pred = trained_model.predict(X_validation))

    current_run = mlflow.active_run()
    runs.append({"id": mlflow.active_run().info.run_id, "name": mlflow.active_run().info.run_name})
    mlflow.log_metric("rmse", validation_rmse)



In [22]:
for run in runs:
  print(f"{run['id']}: {run['name']}")

949ee4f1ba8a4115a7bc0b896dc7c61d: LinearRegression
0cd8b12ae8bd4425b0128c7aef2a9a3e: Lasso
4e79b0f6cb3d4ee18a8c1c6ec44de0dd: DecisionTreeRegressor
539c35c128a440d9860ec146d0ffd755: RandomForestRegressor


### Discovering runs

In [27]:
retrieved_runs = client.search_runs(
  experiment_ids="1",
  filter_string="",
  run_view_type=ViewType.ACTIVE_ONLY,
  order_by=["metrics.rmse ASC"]
)

assert len(retrieved_runs) == len(runs)
for run in retrieved_runs:
  assert run.info.run_id in list(map(lambda run: run["id"], runs))

In [28]:
for run in retrieved_runs:
  print(f"{run.info.run_name}: {run.data.metrics['rmse']}")

RandomForestRegressor: 6.968664183814947
DecisionTreeRegressor: 6.979494582822017
LinearRegression: 7.7207352354521595
Lasso: 11.254830162401234


In [32]:
# get run by condition
for run in client.search_runs(experiment_ids="1", filter_string="metrics.rmse < 7", order_by=["metrics.rmse ASC"]):
  print(f"{run.info.run_name}: {run.data.metrics['rmse']}")

RandomForestRegressor: 6.968664183814947
DecisionTreeRegressor: 6.979494582822017


In [38]:
mlflow.search_runs(experiment_ids="1", filter_string="metrics.rmse < 7", order_by=["metrics.rmse ASC"])[["run_id", "metrics.rmse"]]

Unnamed: 0,run_id,metrics.rmse
0,539c35c128a440d9860ec146d0ffd755,6.968664
1,4e79b0f6cb3d4ee18a8c1c6ec44de0dd,6.979495


## Staging models

After discovering the deciding the best models, now you can register some models to the model_registery.
That's where the ML engineer or the deployment engineer testing models and choosing which model to deploy

data scientest => work till ML models staging \
deployment engineer => work on the staging models and decides which to deploy

In [40]:
REGISTERED_MODEL_NAME = "nyc-taxi-model"

#### Note

There are 2 methods to create a containing registry model:
1. using mlflow.register_model(mode_uri, containing_model_name) \
  This method will create the containing model if not exists
2. using client.create_registered_mode(containing_model_name) \
   using client.create_model_version(containing_model_name, model_uri, version_tags, description) \
   THis method gives more control over the creation and maintaining of the model and their versions

In [None]:
mlflow.register_model()

In [None]:
client.create_registered_model(name=REGISTERED_MODEL_NAME)
client.al

<RegisteredModel: aliases={}, creation_timestamp=1749775356656, description=None, last_updated_timestamp=1749775356656, latest_versions=[], name='automatic_registry_1', tags={}>

In [None]:
client.create_model_version(
  name=REGISTERED_MODEL_NAME + "XX", 
  source="runs:/7e4de4b900ae48a28eb1afb06bf8868c/model", 
  tags={"test": "hamada"}, 
  description="description test"
)


MlflowException: Registered Model with name=automatic_registry_1XX not found

In [8]:
import mlflow

mlflow.set_tracking_uri("sqlite:///mlflow.db")

In [25]:
for run in runs:
  # For every run with the same registered model a new version is created from the same ML-model
  mlflow.register_model(model_uri=f"runs:/{run.info.run_id}/model", name = REGISTERED_MODEL_NAME)

Registered model 'automatic_registry_1' already exists. Creating a new version of this model...
Created version '1' of model 'automatic_registry_1'.
Registered model 'automatic_registry_1' already exists. Creating a new version of this model...
Created version '2' of model 'automatic_registry_1'.
Registered model 'automatic_registry_1' already exists. Creating a new version of this model...
Created version '3' of model 'automatic_registry_1'.


In [32]:
client.set_registered_model_alias(REGISTERED_MODEL_NAME, "gamd_f45", 1)

In [36]:
client.get_model_version(REGISTERED_MODEL_NAME, 1).aliases

['gamd_f45', 'monster']

In [None]:
client.update_model_version()