In [1]:
from mlflow.tracking import MlflowClient


MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"


client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)


<h2> Access MLFlow tracking server using MlflowClient Python </h2>

In [29]:
import pandas as pd

In [3]:
client.list_experiments()

[<Experiment: artifact_location='./mlruns/0', experiment_id='0', lifecycle_stage='active', name='Default', tags={}>,
 <Experiment: artifact_location='./mlruns/1', experiment_id='1', lifecycle_stage='active', name='first-mlflow-experiement', tags={}>]

In [4]:
client.create_experiment('new-experiment')

'2'

In [12]:
from mlflow.entities import ViewType
runs = client.search_runs(experiment_ids='1',filter_string="metrics.RMSE < 6.9",run_view_type=ViewType.ACTIVE_ONLY,max_results=5,order_by=["metrics.rmse ASC"])

In [13]:
for run in runs:
    print(f" run id :{run.info.run_id}, RMSE : {run.data.metrics['RMSE']:.4f}")

 run id :a21214114c204c569838eb19b17ad41f, RMSE : 6.8243
 run id :ccba245e9d1440f684def52458696058, RMSE : 6.8243
 run id :243656341ba743c1866135dc1e56632a, RMSE : 6.8818
 run id :ab72fe0e343a4299971bd0d61aee26dd, RMSE : 6.8666
 run id :3b8bca8fbf62419f8320cb4a7bc6407b, RMSE : 6.8586


In [15]:
import mlflow

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

run_id = 'ccba245e9d1440f684def52458696058'
mlflow.register_model(model_uri=f'runs:/{run_id}/model',name='nyc-taxi-regressor')

Successfully registered model 'nyc-taxi-regressor'.
2022/05/29 16:46:01 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: nyc-taxi-regressor, version 1
Created version '1' of model 'nyc-taxi-regressor'.


<ModelVersion: creation_timestamp=1653822961683, current_stage='None', description=None, last_updated_timestamp=1653822961683, name='nyc-taxi-regressor', run_id='ccba245e9d1440f684def52458696058', run_link=None, source='./mlruns/1/ccba245e9d1440f684def52458696058/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>

In [17]:
client.list_registered_models()

[<RegisteredModel: creation_timestamp=1653822960858, description=None, last_updated_timestamp=1653822961683, latest_versions=[<ModelVersion: creation_timestamp=1653822961683, current_stage='None', description=None, last_updated_timestamp=1653822961683, name='nyc-taxi-regressor', run_id='ccba245e9d1440f684def52458696058', run_link=None, source='./mlruns/1/ccba245e9d1440f684def52458696058/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>], name='nyc-taxi-regressor', tags={}>,
 <RegisteredModel: creation_timestamp=1653822030395, description='', last_updated_timestamp=1653822143397, latest_versions=[<ModelVersion: creation_timestamp=1653822031804, current_stage='Staging', description='', last_updated_timestamp=1653822143397, name='nyc-taxi-rides', run_id='a21214114c204c569838eb19b17ad41f', run_link='', source='./mlruns/1/a21214114c204c569838eb19b17ad41f/artifacts/models_mlflow', status='READY', status_message=None, tags={}, user_id=None, version=1>], 

In [18]:
client.get_latest_versions(name='nyc-taxi-regressor')

[<ModelVersion: creation_timestamp=1653822961683, current_stage='None', description=None, last_updated_timestamp=1653822961683, name='nyc-taxi-regressor', run_id='ccba245e9d1440f684def52458696058', run_link=None, source='./mlruns/1/ccba245e9d1440f684def52458696058/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>]

In [22]:
for version in client.get_latest_versions(name='nyc-taxi-regressor'):
    print(f'version : {version.version}, run_id : {version.run_id} , current_stage : {version.current_stage}')
    #print(version.current_stage)

version : 1, run_id : ccba245e9d1440f684def52458696058 , current_stage : None


<h2> Adding model to model registry and adding annotations  </h2>

In [24]:
client.transition_model_version_stage(name='nyc-taxi-regressor',archive_existing_versions=False,version=1,stage="Staging")

<ModelVersion: creation_timestamp=1653822961683, current_stage='Staging', description=None, last_updated_timestamp=1653823405215, name='nyc-taxi-regressor', run_id='ccba245e9d1440f684def52458696058', run_link=None, source='./mlruns/1/ccba245e9d1440f684def52458696058/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>

In [27]:
from datetime import datetime

date = datetime.today().date()
date

datetime.date(2022, 5, 29)

In [28]:
model_version = 1
stage = 'Staging'
client.update_model_version(name='nyc-taxi-regressor',version=1,description=f"The model version {model_version} transitioned to {stage} on {date}")

<ModelVersion: creation_timestamp=1653822961683, current_stage='Staging', description='The model version 1 transitioned to Staging on 2022-05-29', last_updated_timestamp=1653823721735, name='nyc-taxi-regressor', run_id='ccba245e9d1440f684def52458696058', run_link=None, source='./mlruns/1/ccba245e9d1440f684def52458696058/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>

<h2> Retrieving  the model registered in model registry and evaluate the results </h2> 

In [38]:
def read_dataframe(file_name):
    
    data = pd.read_parquet(file_name)
    
    data['Duration'] = data.lpep_dropoff_datetime - data.lpep_pickup_datetime
    
    data.Duration = data.Duration.apply(lambda dt: dt.total_seconds()/60)
    
    data = data[(data.Duration >= 1) & (data.Duration <= 60)]
    
    categorical = ['PULocationID','DOLocationID']
    
    data[categorical] = data[categorical].astype(str)
    
    return data

In [49]:
def preprocess(df,dv):
    df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID']
    
    categorical = ['PU_DO']
    numerical = ['trip_distance']
    
    
    dicts = df[categorical+numerical].to_dict(orient='records')
    
    return dv.transform(dicts)

In [52]:
from sklearn.metrics import mean_squared_error

In [55]:
def test_model(x,y,name,stage):
    load_model = mlflow.pyfunc.load_model(f'models:/{name}/{stage}')
    y_pred = load_model.predict(x)
    return {'RMSE' : mean_squared_error(y,y_pred,squared=False) }

In [39]:
data = read_dataframe('data/green_tripdata_2021-03.parquet')
data.head()

Unnamed: 0,VendorID,lpep_pickup_datetime,lpep_dropoff_datetime,store_and_fwd_flag,RatecodeID,PULocationID,DOLocationID,passenger_count,trip_distance,fare_amount,extra,mta_tax,tip_amount,tolls_amount,ehail_fee,improvement_surcharge,total_amount,payment_type,trip_type,congestion_surcharge,Duration
0,2,2021-03-01 00:05:42,2021-03-01 00:14:03,N,1.0,83,129,1.0,1.56,7.5,0.5,0.5,0.0,0.0,,0.3,8.8,1.0,1.0,0.0,8.35
1,2,2021-03-01 00:21:03,2021-03-01 00:26:17,N,1.0,243,235,1.0,0.96,6.0,0.5,0.5,0.0,0.0,,0.3,7.3,2.0,1.0,0.0,5.233333
2,2,2021-03-01 00:02:06,2021-03-01 00:22:26,N,1.0,75,242,1.0,9.93,28.0,0.5,0.5,2.0,0.0,,0.3,31.3,1.0,1.0,0.0,20.333333
3,2,2021-03-01 00:24:03,2021-03-01 00:31:43,N,1.0,242,208,1.0,2.57,9.5,0.5,0.5,0.0,0.0,,0.3,10.8,2.0,1.0,0.0,7.666667
4,1,2021-03-01 00:11:10,2021-03-01 00:14:46,N,1.0,41,151,1.0,0.8,5.0,0.5,0.5,1.85,0.0,,0.3,8.15,1.0,1.0,0.0,3.6


In [44]:
client.download_artifacts(run_id='a21214114c204c569838eb19b17ad41f',path='Preprocessors',dst_path='.')

'D:\\mlops-zoomcamp\\03-training\\Preprocessors'

In [45]:
import pickle

with open('D:\\mlops-zoomcamp\\03-training\\Preprocessors/preprocessor.b','rb') as f:
    dv = pickle.load(f)
dv

In [50]:
preprocess_data = preprocess(data,dv)

In [51]:
y_label = data.Duration.values

In [56]:
name = 'nyc-taxi-regressor'
stage = 'Staging'
%time test_model(preprocess_data,y_label,name,stage)

CPU times: total: 20.3 s
Wall time: 3.22 s


{'RMSE': 6.745857322834793}