In [4]:
from mlflow.tracking import MlflowClient

MLFLOW_TRACKING_URI = "sqlite:///mlexp.db"

In [5]:
client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

client.search_experiments()

[<Experiment: artifact_location='file:///d:/ML/zoomcamp/02-Experiment-tracking/mlruns/2', creation_time=1741022819579, experiment_id='2', last_update_time=1741022819579, lifecycle_stage='active', name='my_cool_experiment', tags={}>,
 <Experiment: artifact_location='file:///d:/ML/zoomcamp/02-Experiment-tracking/mlruns/1', creation_time=1740471480731, experiment_id='1', last_update_time=1740471480731, lifecycle_stage='active', name='nyctaxi-experiment', tags={}>,
 <Experiment: artifact_location='file:///d:/ML/zoomcamp/02-Experiment-tracking/mlruns/0', creation_time=1740471480655, experiment_id='0', last_update_time=1740471480655, lifecycle_stage='active', name='Default', tags={}>]

In [5]:
client.create_experiment("new_trial_exp")

'3'

In [10]:
from mlflow.entities import ViewType

runs = client.search_runs(
    experiment_ids='1',
    filter_string='metrics.rmse<8',
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=5,
    order_by=['metrics.rmse ASC']

)

In [11]:
for run in runs:
    print(f"run id:{run.info.run_id},rmse:{run.data.metrics['rmse']:.4f}")

run id:1afa885598694e3a85fcff3264b7b741,rmse:5.2943
run id:7ecb816404814450a1a899e8b843dc1c,rmse:5.3546
run id:712af05048b046bd865692ae6a05a21e,rmse:5.3860
run id:4fc720eb361048a697dfae8f888d4965,rmse:5.5316
run id:11877bd07a354104b81245a8a73e2154,rmse:5.5870


In [12]:
import mlflow
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [20]:
run_id = "7ecb816404814450a1a899e8b843dc1c"
model_uri = f"runs:/{run_id}/models_mlflow"
mlflow.register_model(model_uri=model_uri,name='nyc-taxi-regressor')

Registered model 'nyc-taxi-regressor' already exists. Creating a new version of this model...
Created version '6' of model 'nyc-taxi-regressor'.


<ModelVersion: aliases=[], creation_timestamp=1741713393051, current_stage='None', description=None, last_updated_timestamp=1741713393051, name='nyc-taxi-regressor', run_id='7ecb816404814450a1a899e8b843dc1c', run_link=None, source='file:///d:/ML/zoomcamp/02-Experiment-tracking/mlruns/1/7ecb816404814450a1a899e8b843dc1c/artifacts/models_mlflow', status='READY', status_message=None, tags={}, user_id=None, version=6>

In [23]:
client.search_registered_models()

[<RegisteredModel: aliases={}, creation_timestamp=1741027340141, description='Linear regression model', last_updated_timestamp=1741027986853, latest_versions=[<ModelVersion: aliases=[], creation_timestamp=1741027340320, current_stage='None', description=None, last_updated_timestamp=1741027340320, name='base-linear-regressor', run_id='c503c114452f4ab08f2e1a421f7394c6', run_link=None, source='file:///d:/ML/zoomcamp/02-Experiment-tracking/mlruns/2/c503c114452f4ab08f2e1a421f7394c6/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>], name='base-linear-regressor', tags={}>,
 <RegisteredModel: aliases={}, creation_timestamp=1741688743947, description='NYC taxi trip predictor for trip duration', last_updated_timestamp=1741713393051, latest_versions=[<ModelVersion: aliases=[], creation_timestamp=1741688764425, current_stage='Staging', description='ridge regressor', last_updated_timestamp=1741690521453, name='nyc-taxi-regressor', run_id='5a2ea0d1f046462a8fe7

In [30]:
model_name = 'nyc-taxi-regressor'
latest_versions = client.get_latest_versions(name= model_name)

for version in latest_versions:
    print(f"version:{version.version},stage:{version.current_stage}")

version:6,stage:Staging
version:5,stage:None


  latest_versions = client.get_latest_versions(name= model_name)


In [33]:
model_version = 6 
new_stage = 'Staging'
client.transition_model_version_stage(
    name=model_name,
    version=model_version,
    stage=new_stage,
    archive_existing_versions=False
)

  client.transition_model_version_stage(


<ModelVersion: aliases=[], creation_timestamp=1741713393051, current_stage='Staging', description=None, last_updated_timestamp=1741715099602, name='nyc-taxi-regressor', run_id='7ecb816404814450a1a899e8b843dc1c', run_link=None, source='file:///d:/ML/zoomcamp/02-Experiment-tracking/mlruns/1/7ecb816404814450a1a899e8b843dc1c/artifacts/models_mlflow', status='READY', status_message=None, tags={}, user_id=None, version=6>

In [34]:
from datetime import datetime
date = datetime.today().date()

client.update_model_version(
    name=model_name,
    version=model_version,
    description=f"The model version {model_version} has been transitioned to {new_stage} on {date}"

)

<ModelVersion: aliases=[], creation_timestamp=1741713393051, current_stage='Staging', description='The model version 6 has been transitioned to Staging on 2025-03-11', last_updated_timestamp=1741715108009, name='nyc-taxi-regressor', run_id='7ecb816404814450a1a899e8b843dc1c', run_link=None, source='file:///d:/ML/zoomcamp/02-Experiment-tracking/mlruns/1/7ecb816404814450a1a899e8b843dc1c/artifacts/models_mlflow', status='READY', status_message=None, tags={}, user_id=None, version=6>

In [55]:
from sklearn.metrics import root_mean_squared_error
from sklearn.feature_extraction import DictVectorizer
import pandas as pd

def read_dataframe(filename):
    if filename.endswith('.csv'):
        df_full = pd.read_csv(filename)
        df = df_full.sample(frac=0.2,random_state=42)

        df.tpep_dropoff_datetime = pd.to_datetime(df.tpep_dropoff_datetime)
        df.tpep_pickup_datetime = pd.to_datetime(df.tpep_pickup_datetime)
    elif filename.endswith('.parquet'):
        df_full = pd.read_parquet(filename)
        df = df_full.sample(frac=0.2,random_state=42)

    df['duration'] = df.tpep_dropoff_datetime - df.tpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df

def preprocess(df,dv):
    df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID']
    categorical = ['PU_DO'] #'PULocationID', 'DOLocationID']
    numerical = ['trip_distance']

    train_dicts = df[categorical + numerical].to_dict(orient='records')
    return dv.transform(train_dicts)

def test_model(name,stage,X_test,y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(X_test)
    return{'rmse':root_mean_squared_error(y_test,y_pred)}


In [50]:
df = read_dataframe("./data/yellow_tripdata_2024-03.parquet")

In [38]:
client.download_artifacts(run_id=run_id,path='preprocessor',dst_path='.')

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

'd:\\ML\\zoomcamp\\02-Experiment-tracking\\preprocessor'

In [51]:
import pickle

with open("preprocessor/preprocessor.b","rb") as f_in:
    dv = pickle.load(f_in)

In [52]:
X_test = preprocess(df,dv)

In [53]:
target="duration"
y_test = df[target].values

In [56]:
%time test_model(name=model_name,stage='Staging',X_test=X_test,y_test=y_test)

  latest = client.get_latest_versions(name, None if stage is None else [stage])
 - mlflow (current: 2.20.3, required: mlflow==2.20.2)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


CPU times: total: 8.09 s
Wall time: 2.8 s


{'rmse': 5.698330495375579}

In [57]:
client.transition_model_version_stage(model_name,model_version,stage='Production',
                                      archive_existing_versions=True)

  client.transition_model_version_stage(model_name,model_version,stage='Production',


<ModelVersion: aliases=[], creation_timestamp=1741713393051, current_stage='Production', description='The model version 6 has been transitioned to Staging on 2025-03-11', last_updated_timestamp=1741718493782, name='nyc-taxi-regressor', run_id='7ecb816404814450a1a899e8b843dc1c', run_link=None, source='file:///d:/ML/zoomcamp/02-Experiment-tracking/mlruns/1/7ecb816404814450a1a899e8b843dc1c/artifacts/models_mlflow', status='READY', status_message=None, tags={}, user_id=None, version=6>