In [2]:
from mlflow.tracking import MlflowClient

MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"

client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

In [4]:
# List all experiments
client.search_experiments()

[<Experiment: artifact_location='./mlruns/1', creation_time=1666178202276, experiment_id='1', last_update_time=1666178202276, lifecycle_stage='active', name='nyc-taxi-duration-experiment', tags={}>,
 <Experiment: artifact_location='./mlruns/0', creation_time=None, experiment_id='0', last_update_time=None, lifecycle_stage='active', name='Default', tags={}>]

In [5]:
# Create new experiment
client.create_experiment(name='test_experiment')

'2'

In [9]:
from mlflow.entities import ViewType
# List all runs
client.search_runs(
    experiment_ids=1,
    filter_string='metrics.rmse < 6',
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results = 5,
    order_by=['metrics.rmse ASC']
)

[<Run: data=<RunData: metrics={'best_iteration': 999.0,
  'rmse': 5.5807021969663255,
  'stopped_iteration': 999.0,
  'validation-rmse': 5.580702194003926}, params={'learning_rate': '0.1700079110741563',
  'max_depth': '16',
  'min_child_weight': '2.37717271395477',
  'objective': 'reg:linear',
  'reg_alpha': '0.363258077609188',
  'reg_lambda': '0.011733914718256189',
  'seed': '42'}, tags={'mlflow.log-model.history': '[{"run_id": "a7b398d09b174d1d93f0d99e30d0c19a", '
                              '"artifact_path": "model", "utc_time_created": '
                              '"2022-10-20 09:13:17.621195", "flavors": '
                              '{"python_function": {"loader_module": '
                              '"mlflow.xgboost", "python_version": "3.9.13", '
                              '"data": "model.xgb", "env": "conda.yaml"}, '
                              '"xgboost": {"xgb_version": "1.6.2", "data": '
                              '"model.xgb", "model_class": '
         

In [10]:
import mlflow
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [11]:
# Register Model into model registry
model_uri = 'runs:/a7b398d09b174d1d93f0d99e30d0c19a/model'
mlflow.register_model(model_uri=model_uri,name='nyc-taxi-regressor')

Registered model 'nyc-taxi-regressor' already exists. Creating a new version of this model...
2022/10/20 09:58:33 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: nyc-taxi-regressor, version 3
Created version '3' of model 'nyc-taxi-regressor'.


<ModelVersion: creation_timestamp=1666259910331, current_stage='None', description=None, last_updated_timestamp=1666259910331, name='nyc-taxi-regressor', run_id='a7b398d09b174d1d93f0d99e30d0c19a', run_link=None, source='./mlruns/1/a7b398d09b174d1d93f0d99e30d0c19a/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

In [12]:
# List all registered models
client.search_registered_models()

[<RegisteredModel: creation_timestamp=1666258302312, description='', last_updated_timestamp=1666259910331, latest_versions=[<ModelVersion: creation_timestamp=1666258352060, current_stage='Staging', description='', last_updated_timestamp=1666258544017, name='nyc-taxi-regressor', run_id='696604acf6644efd9a32a16fd955bc91', run_link='', source='./mlruns/1/696604acf6644efd9a32a16fd955bc91/artifacts/models_mlflow', status='READY', status_message=None, tags={}, user_id=None, version=2>,
  <ModelVersion: creation_timestamp=1666259910331, current_stage='None', description=None, last_updated_timestamp=1666259910331, name='nyc-taxi-regressor', run_id='a7b398d09b174d1d93f0d99e30d0c19a', run_link=None, source='./mlruns/1/a7b398d09b174d1d93f0d99e30d0c19a/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>], name='nyc-taxi-regressor', tags={}>]

In [14]:
# Transition model btw staged production and archive
client.transition_model_version_stage(
    name = 'nyc-taxi-regressor',
    version=3,
    stage = 'Staging',
    archive_existing_versions=False
)

<ModelVersion: creation_timestamp=1666259910331, current_stage='Staging', description=None, last_updated_timestamp=1666260584870, name='nyc-taxi-regressor', run_id='a7b398d09b174d1d93f0d99e30d0c19a', run_link=None, source='./mlruns/1/a7b398d09b174d1d93f0d99e30d0c19a/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

In [19]:
from sklearn.metrics import mean_squared_error
import pandas as pd

# define function to clean dataset
def clean(filename):
    df = pd.read_parquet(filename)
    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df['duration'] = df.duration.apply(lambda td: td.total_seconds()/60)
    df = df[(df.duration >= 1) & (df.duration <= 60)]
    df = df[(df.trip_distance > 1)&(df.trip_distance < 25)]
    df = df[(df.total_amount > 1)&(df.total_amount < 150)]
    df = df[df['passenger_count'] != 0]  
    df['PU_DO_pair'] = df['PULocationID'].astype(str) + '_' + df['DOLocationID'].astype(str)
    return df

def preprocess(df,scaler,dv):
    categorical = ['PU_DO_pair']
    numerical = ['trip_distance','fare_amount']
    target = 'duration'
    # Pre Processing - Numerical
    df[numerical] = scaler.transform(df[numerical])
    # Pre Processing - Categorical
    df[categorical] = df[categorical].astype(str)
    val_dicts = df[categorical+numerical].to_dict(orient='records')
    X = dv.transform(val_dicts)
    y = df[target].values
    return X,y
    
def test_model(name, stage, X_test, y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(X_test)
    return{'rmse': mean_squared_error(y_test,y_pred,squared=False)}


In [18]:
df = clean('./data/green_tripdata_2021-03.parquet')

In [20]:
import pickle

with open('./models/scaler.b','rb') as f_in:
    scaler = pickle.load(f_in)

with open('./models/vectorizer.b','rb') as f_in:
    dv = pickle.load(f_in)

X_test, y_test = preprocess(df,scaler,dv)

In [23]:
%time test_model(name='nyc-taxi-regressor',stage='Staging',X_test=X_test,y_test=y_test)

CPU times: user 5.51 s, sys: 0 ns, total: 5.51 s
Wall time: 571 ms


{'rmse': 5.579155130005743}