In [1]:
import mlflow
from mlflow.tracking import MlflowClient

mlflow_tracking_uri = "sqlite:///mlflow.db"
client = MlflowClient(tracking_uri=mlflow_tracking_uri)

In [39]:
from sklearn.metrics import mean_squared_error
import pandas as pd

def read_DataFrame(filename):
    df = pd.read_parquet(filename)
    
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)
    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    
    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    
    df['duration'] = df['duration'].apply(lambda td: td.total_seconds()/60)
    df['PU_DO'] = df['PULocationID'].astype(str) + '_' + df['DOLocationID'].astype(str)
    
    
    categorical = ['PULocationID','DOLocationID']
    numerical = ['trip_distance']
    
    df[categorical]= df[categorical].astype(str)
    df = df[(df.duration > 1) & (df.duration <=60)]
    return df

def preprocessing_data(df, dv):
    #dv = DictVectorizer()
    
    df['PU_DO'] = df['PULocationID'].astype(str) + '_' + df['DOLocationID'].astype(str)
    categorical = ['PU_DO']
    numerical = ['trip_distance']

    train_dict = df[categorical + numerical].to_dict(orient='records')
    return dv.transform(train_dict)


def test_model(name, stage, X_test, y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(X_test)
    return {"rmse": mean_squared_error(y_test, y_pred, squared=False)}

def test_model_dt(name, stage, X_test, y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(X_test)
    return {"rmse": mean_squared_error(y_test, y_pred, squared=False)}


In [3]:
df = read_DataFrame("../data/green_tripdata_2023-03.parquet")

In [4]:
import pickle

with open("models/preprocessor.b", 'rb') as f_in:
    dv= pickle.load(f_in)

In [5]:
X_test = preprocessing_data(df, dv)

In [6]:
target = 'duration'
y_test = df[target].values

In [None]:
import mlflow.pyfunc

model_name = "Nyc_taxi_Arrival"
#model_name = "sk-learn-random-forest-reg-model"
alias = "challenger"

champion_version = mlflow.pyfunc.load_model(f"models:/{model_name}@{alias}")

In [None]:
%time test_model_dt(name= "nyc_taxi_duration_first_exp ", stage="Production", X_test=X_test, y_test=y_test)

In [11]:
model_name = "Nyc_taxi_Arrival"
latest_verions = client.get_latest_versions(name=model_name)

for version in latest_verions:
    print(f"version: {version.version}, stage:{version.current_stage}, model_name:{version.name}")

version: 4, stage:Staging, model_name:Nyc_taxi_Arrival
version: 2, stage:Production, model_name:Nyc_taxi_Arrival


  latest_verions = client.get_latest_versions(name=model_name)
