#  Using the MLflow-API in Jupyter Notebook

In [39]:
from mlflow.tracking import MlflowClient

MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"

client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

In [None]:
client.search_experiments()

## Creates new experiment that can be seen in the UI window

In [5]:
client.create_experiment(name="my-cool-experiment")

'2'

## Finding the best runs of an experiment

In [6]:
from mlflow.entities import ViewType

In [11]:
runs = client.search_runs(
    experiment_ids="1",  # specify experiment
    filter_string="metrics.rmse < 6.8", # get only certain runs (no filtering if "")
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=5,
    order_by=["metrics.rmse ASC"]
)

In [12]:
for run in runs:
    print(f"run id: {run.info.run_id}, rmse: {run.data.metrics['rmse']:.4f}")

run id: c96613bfe29f4522a8a46179f04e2681, rmse: 6.3032
run id: 8a204fd78a4248869787181306cda116, rmse: 6.3032
run id: a8beaa704725443db702c63ffc026972, rmse: 6.3032
run id: cdc3ef7c7d044b17b51d3dcd4bed5f9b, rmse: 6.3032
run id: 6c4492d3bb52417da4478cb0ff607b02, rmse: 6.3032


## Promote Models to the Model Regristry

In [13]:
import mlflow
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [None]:
run_id = "c96613bfe29f4522a8a46179f04e2681"
model_uri = f"runs:/{run_id}/model"
mlflow.register_model(model_uri=model_uri, name="nyc-taxi-regressor")

In [16]:
print(model_uri)

runs:/c96613bfe29f4522a8a46179f04e2681/model


On the model registry should now be a 3rd version of the model.

## Transitioning a model from one stage to another

In [18]:
# Listing registered models
client.search_registered_models()

[<RegisteredModel: aliases={}, creation_timestamp=1686307016827, description='', last_updated_timestamp=1686308924679, latest_versions=[<ModelVersion: aliases=[], creation_timestamp=1686307249108, current_stage='Staging', description='', last_updated_timestamp=1686307689630, name='nyc-taxi-regressor', run_id='8a204fd78a4248869787181306cda116', run_link='', source='/media/userl/Ubuntu-DataStora/Learning/MLOps_Zoomcamp/mlops-material/2-Experiment-Tracking/mlruns/1/8a204fd78a4248869787181306cda116/artifacts/models_mlflow', status='READY', status_message=None, tags={}, user_id=None, version=2>,
  <ModelVersion: aliases=[], creation_timestamp=1686308924679, current_stage='None', description=None, last_updated_timestamp=1686308924679, name='nyc-taxi-regressor', run_id='c96613bfe29f4522a8a46179f04e2681', run_link=None, source='/media/userl/Ubuntu-DataStora/Learning/MLOps_Zoomcamp/mlops-material/2-Experiment-Tracking/mlruns/1/c96613bfe29f4522a8a46179f04e2681/artifacts/model', status='READY', s

Get the newest version of a particular model:

In [22]:
model_name = "nyc-taxi-regressor"
latest_versions = client.get_latest_versions(name=model_name)

# Listing info about model versions
for version in latest_versions:
    print(f"version: {version.version}, stage: {version.current_stage}")

version: 2, stage: Staging
version: 3, stage: None


In [None]:
model_version = 3
new_stage = "Staging"

client.transition_model_version_stage(
    name=model_name,
    version=model_version,
    stage=new_stage,
    archive_existing_versions=False
)

Now look at the UI and the 3rd model version should be in the state `Staging`.

### Updating model version (changing information)

In [None]:
from datetime import datetime

date = datetime.today()

client.update_model_version(
    name=model_name,
    version=model_version,
    description=f"The model verseion {model_version} was transitioned to {new_stage} on {date}"
)

Again look into the model-registry to see the changes.

### Example Code

In [54]:
from sklearn.metrics import mean_squared_error as MSE
import pandas as pd

def read_dataframe(filename):
    df = pd.read_parquet(filename)

    if "2021" in filename:
        dropoff_datetime = "lpep_dropoff_datetime"
        pickup_datetime = "lpep_pickup_datetime"
    elif "2022" in filename:
        dropoff_datetime = "tpep_dropoff_datetime"
        pickup_datetime = "tpep_pickup_datetime"
    else:
        raise ValueError("Invalid dataset")
    
    df[dropoff_datetime] = pd.to_datetime(df[dropoff_datetime])
    df[pickup_datetime] = pd.to_datetime(df[pickup_datetime])

    # create duration-column
    df["duration"] = df[dropoff_datetime] - df[pickup_datetime]
    df["duration"] = df["duration"].dt.total_seconds() / 60

    # remove outliers
    df = df[(df["duration"] >= 1) & (df["duration"] <= 60)].copy()

    # convert categorical features to strings (to get 1-hot encoding)
    categorical = ["PULocationID", "DOLocationID"]
    df[categorical] = df[categorical].astype(str)

    return df

def preprocess(df, dv):
    df["PU_DO"] = df["PULocationID"] + "_" + df["DOLocationID"]
    categorical = ["PU_DO"]
    numerical = ["trip_distance"]
    train_dicts = df[categorical + numerical].to_dict(orient="records")
    return dv.transform(train_dicts)

def test_model(name, stage, X_test, y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(X_test)
    return { "rmse": MSE(y_test, y_pred, squared=False) }

In [50]:
# get the data
df = read_dataframe("data/green_tripdata_2021-01.parquet")

In [None]:
# get the preprocessor from the model
client.download_artifacts(run_id=run_id, path="preprocessor", dst_path=".")

In [31]:
import pickle
with open("preprocessor/preprocessor.b", "rb") as f_in:
    dv = pickle.load(f_in)

In [32]:
X_test = preprocess(df, dv)

In [36]:
target = "duration"
y_test = df[target].values

In [55]:
%time test_model(name=model_name, stage="Production", X_test=X_test, y_test=y_test)

 - mlflow (current: 2.3.2, required: mlflow==2.3)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


CPU times: user 5.74 s, sys: 6.93 ms, total: 5.75 s
Wall time: 1.2 s


{'rmse': 4.136594651260994}

In [57]:
%time test_model(name=model_name, stage="Staging", X_test=X_test, y_test=y_test)

 - mlflow (current: 2.3.2, required: mlflow==2.3)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


CPU times: user 5.6 s, sys: 12.5 ms, total: 5.61 s
Wall time: 1.14 s


{'rmse': 4.136594651260994}

In [58]:
client.transition_model_version_stage(
    name=model_name,
    version=3,
    stage="Production",
    archive_existing_versions=True
)

<ModelVersion: aliases=[], creation_timestamp=1686308924679, current_stage='Production', description='The model verseion 3 was transitioned to Staging on 2023-06-09 13:19:44.267008', last_updated_timestamp=1686313543568, name='nyc-taxi-regressor', run_id='c96613bfe29f4522a8a46179f04e2681', run_link=None, source='/media/userl/Ubuntu-DataStora/Learning/MLOps_Zoomcamp/mlops-material/2-Experiment-Tracking/mlruns/1/c96613bfe29f4522a8a46179f04e2681/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>