# Get access to mlflow without UI

In [2]:
from mlflow.tracking import MlflowClient

MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"

client = MlflowClient(tracking_uri = MLFLOW_TRACKING_URI)

In [3]:
client.search_experiments()

[<Experiment: artifact_location='/workspaces/mlops-zoomcamp/03-training/experiment_tracking/mlruns/2', creation_time=1739379215612, experiment_id='2', last_update_time=1739379215612, lifecycle_stage='active', name='test', tags={}>,
 <Experiment: artifact_location='/workspaces/mlops-zoomcamp/03-training/experiment_tracking/mlruns/1', creation_time=1738676613877, experiment_id='1', last_update_time=1738676613877, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>,
 <Experiment: artifact_location='/workspaces/mlops-zoomcamp/03-training/experiment_tracking/mlruns/0', creation_time=1738676613873, experiment_id='0', last_update_time=1738676613873, lifecycle_stage='active', name='Default', tags={}>]

In [4]:
from mlflow.entities import ViewType

# can inspect the runs and apply filters to the search
runs = client.search_runs(
    experiment_ids='1',
    filter_string='metrics.rmse < 6.8',
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=5,
    order_by=["metrics.rmse ASC"]
)

In [6]:
runs[0]

<Run: data=<RunData: metrics={'rmse': 5.469224792417576}, params={'learning_rate': '0.29122519299691013',
 'max_depth': '87',
 'min_child_weight': '1.1251068416171426',
 'objective': 'reg:linear',
 'reg_alpha': '0.00716725908061934',
 'seed': '42'}, tags={'mlflow.log-model.history': '[{"run_id": "7ced78e294ba48c9904625c91346cb01", '
                             '"artifact_path": "models_mlflow", '
                             '"utc_time_created": "2025-02-12 '
                             '16:27:39.192719", "model_uuid": '
                             '"0e377b4742e14c3abbdcd0e4a7929441", "flavors": '
                             '{"python_function": {"loader_module": '
                             '"mlflow.xgboost", "python_version": "3.13.1", '
                             '"data": "model.xgb", "env": {"conda": '
                             '"conda.yaml", "virtualenv": "python_env.yaml"}}, '
                             '"xgboost": {"xgb_version": "2.1.3", "data": '
                 

In [4]:
for run in runs:
    print(f"run id: {run.info.run_id}, rmse: {run.data.metrics['rmse']:.4f}")

run id: 7ced78e294ba48c9904625c91346cb01, rmse: 5.4692


# Promote models to model registry

In [5]:
import mlflow

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [6]:
run_id = "530b0b0f348a42549e4d21fb6910f057"
model_uri = f"runs:/{run_id}/models_mlflow"

mlflow.register_model(model_uri=model_uri, name="nyc-taxi-regressor")

Registered model 'nyc-taxi-regressor' already exists. Creating a new version of this model...
Created version '3' of model 'nyc-taxi-regressor'.


<ModelVersion: aliases=[], creation_timestamp=1739447100283, current_stage='None', description=None, last_updated_timestamp=1739447100283, name='nyc-taxi-regressor', run_id='530b0b0f348a42549e4d21fb6910f057', run_link=None, source='/workspaces/mlops-zoomcamp/03-training/experiment_tracking/mlruns/1/530b0b0f348a42549e4d21fb6910f057/artifacts/models_mlflow', status='READY', status_message=None, tags={}, user_id=None, version=3>

In [7]:
model_name = "nyc-taxi-regressor"
latest_versions = client.get_latest_versions(name=model_name)

for version in latest_versions:
    print(f"version: {version.version}, alias: {version.aliases}")

version: 2, alias: []
version: 3, alias: []


  latest_versions = client.get_latest_versions(name=model_name)


In [8]:
latest_versions

[<ModelVersion: aliases=[], creation_timestamp=1739442288681, current_stage='Staging', description=None, last_updated_timestamp=1739443552634, name='nyc-taxi-regressor', run_id='530b0b0f348a42549e4d21fb6910f057', run_link=None, source='/workspaces/mlops-zoomcamp/03-training/experiment_tracking/mlruns/1/530b0b0f348a42549e4d21fb6910f057/artifacts/models_mlflow', status='READY', status_message=None, tags={'model': 'sklearn'}, user_id=None, version=2>,
 <ModelVersion: aliases=[], creation_timestamp=1739447100283, current_stage='None', description=None, last_updated_timestamp=1739447100283, name='nyc-taxi-regressor', run_id='530b0b0f348a42549e4d21fb6910f057', run_link=None, source='/workspaces/mlops-zoomcamp/03-training/experiment_tracking/mlruns/1/530b0b0f348a42549e4d21fb6910f057/artifacts/models_mlflow', status='READY', status_message=None, tags={}, user_id=None, version=3>]

In [9]:
client.set_registered_model_alias(
    name = model_name,
    version = 1,
    alias = "challenger1"
)

In [10]:
import pandas as pd
import xgboost as xgb
from sklearn.metrics import mean_squared_error

In [11]:
def read_dataframe(filename):
    df = pd.read_csv(filename)
    df["duration"] = pd.to_datetime(df["tpep_dropoff_datetime"]) - pd.to_datetime(df["tpep_pickup_datetime"])
    df["duration"] = df["duration"].dt.total_seconds() / 60
    df = df[df["duration"] < 60]

    df.loc[:, "PULocationID"] = df["PULocationID"].astype(str)
    df.loc[:, "DOLocationID"] = df["DOLocationID"].astype(str)
    return df

def preprocess(df, dv):
    feature_dicts = df[['PULocationID', 'DOLocationID']].to_dict(orient='records')
    return dv.transform(feature_dicts)

def test_model(name, alias, x_test, y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}@{alias}")
    y_pred = model.predict(x_test)
    return {"rmse": mean_squared_error(y_test, y_pred)**0.5}

In [12]:
test_df = read_dataframe("./data/green_tripdata_2024-03.csv")

  df = pd.read_csv(filename)
  df.loc[:, "PULocationID"] = df["PULocationID"].astype(str)
  df.loc[:, "DOLocationID"] = df["DOLocationID"].astype(str)


In [13]:
import pickle

with open("preprocessor/preprocessor.b", "rb") as f_in:
    dv = pickle.load(f_in)

In [14]:
target = "duration"
y_test = test_df[target].to_numpy()

In [15]:
x_test = preprocess(test_df, dv)
x_test = xgb.DMatrix(x_test, label=y_test)

In [19]:
model = mlflow.xgboost.load_model(f"models:/{model_name}@challenger1")

In [21]:
y_pred = model.predict(x_test)

In [23]:
print({"rmse": mean_squared_error(y_test, y_pred)**0.5})

{'rmse': 5.782673051553634}


In [16]:
%time test_model(model_name, alias="challenger1", x_test=x_test, y_test=y_test)



TypeError: Not supported type for data.<class 'xgboost.core.DMatrix'>