In [41]:
import mlflow
from mlflow.tracking import MlflowClient

mlflow_tracking_uri = "sqlite:///mlflow.db"
client = MlflowClient(tracking_uri=mlflow_tracking_uri)

In [6]:

client.search_experiments()

[<Experiment: artifact_location='/workspaces/Arrival_time_estimation/mlflow/mlruns/1', creation_time=1724119947795, experiment_id='1', last_update_time=1724119947795, lifecycle_stage='active', name='nyc_taxi_duration_first_exp', tags={}>]

In [7]:
client.create_experiment(name='second_experiment')

'2'

In [38]:
from mlflow.entities import ViewType

runs = client.search_runs(
    experiment_ids='1',
    filter_string="status='FINISHED' ",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results= 5,
    order_by=["metrics.rmse ASC"]
)

In [39]:
for run in runs:
    print(f"run id:{run.info.run_id}, rmse:{run.data.metrics['rmse']:.4f}") 

run id:5cde0dbba36e4d55b9d72b2f88c561c2, rmse:4.9727
run id:8acfbf48b0c84198b5729f17e47f546c, rmse:4.9727
run id:0d391e7e8bee4c9697f78817b28b498c, rmse:4.9727
run id:a7e6f7a8457941d4a30b63d4d15bdd34, rmse:4.9727
run id:c87d607c6da741eea74565ceea1f4659, rmse:4.9799


In [42]:
mlflow.set_tracking_uri(mlflow_tracking_uri)

In [57]:
## model_name = nyc_taxi_duration_first_exp

run_id = "a7e6f7a8457941d4a30b63d4d15bdd34"
model_uri= f"runs:/{run_id}/model"
mlflow.register_model(model_uri=model_uri, name="nyc_taxi_duration_first_exp")

Registered model 'nyc_taxi_duration_first_exp' already exists. Creating a new version of this model...
Created version '2' of model 'nyc_taxi_duration_first_exp'.


<ModelVersion: aliases=[], creation_timestamp=1724454664819, current_stage='None', description=None, last_updated_timestamp=1724454664819, name='nyc_taxi_duration_first_exp', run_id='a7e6f7a8457941d4a30b63d4d15bdd34', run_link=None, source='/workspaces/Arrival_time_estimation/mlflow/mlruns/1/a7e6f7a8457941d4a30b63d4d15bdd34/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=2>

In [56]:
# model_name = Nyc_taxi_Arrival

run_id = "c87d607c6da741eea74565ceea1f4659"
model_uri= f"runs:/{run_id}/model"
mlflow.register_model(model_uri=model_uri, name="Nyc_taxi_Arrival")

Registered model 'Nyc_taxi_Arrival' already exists. Creating a new version of this model...
Created version '3' of model 'Nyc_taxi_Arrival'.


<ModelVersion: aliases=[], creation_timestamp=1724454594949, current_stage='None', description=None, last_updated_timestamp=1724454594949, name='Nyc_taxi_Arrival', run_id='c87d607c6da741eea74565ceea1f4659', run_link=None, source='/workspaces/Arrival_time_estimation/mlflow/mlruns/1/c87d607c6da741eea74565ceea1f4659/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

In [55]:
model_name = "Nyc_taxi_Arrival"
latest_verions = client.get_latest_versions(name=model_name)

for version in latest_verions:
    print(f"version: {version.version}, stage:{version.current_stage}")

version: 2, stage:Staging


  latest_verions = client.get_latest_versions(name=model_name)


In [65]:
model_version = 2
new_stage= "Staging"
model_name = "nyc_taxi_duration_first_exp"

client.transition_model_version_stage(
    name= model_name,
    version=model_version,
    stage=new_stage,
    archive_existing_versions=False
)

  client.transition_model_version_stage(


<ModelVersion: aliases=[], creation_timestamp=1724454664819, current_stage='Staging', description=None, last_updated_timestamp=1724455222814, name='nyc_taxi_duration_first_exp', run_id='a7e6f7a8457941d4a30b63d4d15bdd34', run_link=None, source='/workspaces/Arrival_time_estimation/mlflow/mlruns/1/a7e6f7a8457941d4a30b63d4d15bdd34/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=2>

In [60]:
from datetime import datetime

In [66]:
date = datetime.today().date()
model_version = 2
model_name_2= "nyc_taxi_duration_first_exp "
client.update_model_version(
    name= model_name,
    version= model_version,
    description=f"The model version {model_version} was transitioned to {new_stage} on {date}"
)

<ModelVersion: aliases=[], creation_timestamp=1724454664819, current_stage='Staging', description='The model version 2 was transitioned to Staging on 2024-08-23', last_updated_timestamp=1724455237741, name='nyc_taxi_duration_first_exp', run_id='a7e6f7a8457941d4a30b63d4d15bdd34', run_link=None, source='/workspaces/Arrival_time_estimation/mlflow/mlruns/1/a7e6f7a8457941d4a30b63d4d15bdd34/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=2>

In [67]:
from sklearn.metrics import mean_squared_error
import pandas as pd

def read_DataFrame(filename):
    df = pd.read_parquet(filename)
    
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)
    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    
    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    
    df['duration'] = df['duration'].apply(lambda td: td.total_seconds()/60)
    df['PU_DO'] = df['PULocationID'].astype(str) + '_' + df['DOLocationID'].astype(str)
    
    
    categorical = ['PULocationID','DOLocationID']
    numerical = ['trip_distance']
    
    df[categorical]= df[categorical].astype(str)
    df = df[(df.duration > 1) & (df.duration <=60)]
    return df

def preprocessing_data(df, dv):
    dv = DictVectorizer()
    
    df['PU_DO'] = df['PULocationID'].astype(str) + '_' + df['DOLocationID'].astype(str)
    categorical = ['PU_DO']
    numerical = ['trip_distance']

    train_dict = df_train[categorical + numerical].to_dict(orient='records')
    return dv.transform(train_dict)


def test_model(name, stage, X_test, y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(X_test)
    return {"rmse": mean_squared_error(y_test, y_pred, squared=False)}

In [70]:
df = read_DataFrame("../data/green_tripdata_2023-03.parquet")

In [72]:
run_id='5cde0dbba36e4d55b9d72b2f88c561c2'
client.download_artifacts(run_id=run_id, path= 'preprocessor', dst_path='.')

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

'/workspaces/Arrival_time_estimation/mlflow/preprocessor'

In [77]:
import pickle

with open("preprocessor/preprocessor.b", 'rb') as f_in:
    dv= pickle.load(f_in)

EOFError: Ran out of input