In [1]:

!python -V

Python 3.11.11


In [2]:
import pandas as pd
import pickle
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge

from sklearn.metrics import root_mean_squared_error

In [3]:
import mlflow
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("nyc-taxi-experiment")

2025/03/26 14:23:54 INFO mlflow.tracking.fluent: Experiment with name 'nyc-taxi-experiment' does not exist. Creating a new experiment.


<Experiment: artifact_location='/Users/kushalramaiya/Documents/GitHub/mlops-zoomcamp/02-experiment-tracking/mlruns/1', creation_time=1742979234730, experiment_id='1', last_update_time=1742979234730, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>

In [11]:
def read_dataframe(filename):
    df = pd.read_parquet(filename)

    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df

In [13]:
df_train = read_dataframe('../data/green_tripdata_2021-01.parquet')
df_val = read_dataframe('../data/green_tripdata_2021-02.parquet')

In [14]:
len(df_train), len(df_val)

(73908, 61921)

In [15]:
df_train['PU_DO'] = df_train['PULocationID'] + '_' + df_train['DOLocationID']
df_val['PU_DO'] = df_val['PULocationID'] + '_' + df_val['DOLocationID']

In [16]:
categorical = ['PU_DO'] #'PULocationID', 'DOLocationID']
numerical = ['trip_distance']

dv = DictVectorizer()

train_dicts = df_train[categorical + numerical].to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)

val_dicts = df_val[categorical + numerical].to_dict(orient='records')
X_val = dv.transform(val_dicts)

In [17]:
target = 'duration'
y_train = df_train[target].values
y_val = df_val[target].values

In [18]:
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_val)

root_mean_squared_error(y_val, y_pred)

7.758714882773319

In [19]:
with open('models/lin_reg.bin', 'wb') as f_out:
    pickle.dump((dv, lr), f_out)

In [22]:
with mlflow.start_run():

    mlflow.set_tag("developer", "cristian")

    mlflow.log_param("train-data-path", "../data/green_tripdata_2021-01.csv")
    mlflow.log_param("valid-data-path", "../data/green_tripdata_2021-02.csv")

    alpha = 0.1
    mlflow.log_param("alpha", alpha)
    lr = Lasso(alpha)
    lr.fit(X_train, y_train)

    y_pred = lr.predict(X_val)
    rmse = root_mean_squared_error(y_val, y_pred)
    mlflow.log_metric("rmse", rmse)

    mlflow.log_artifact(local_path="models/lin_reg.bin", artifact_path="models_pickle")

In [21]:
import xgboost as xgb

In [23]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [24]:
train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)

In [25]:
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)
        booster = xgb.train(
            params=params,
            dtrain=train,
            num_boost_round=1000,
            evals=[(valid, 'validation')],
            early_stopping_rounds=50
        )
        y_pred = booster.predict(valid)
        rmse = root_mean_squared_error(y_val, y_pred)
        mlflow.log_metric("rmse", rmse)

    return {'loss': rmse, 'status': STATUS_OK}

In [26]:
search_space = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    'learning_rate': hp.loguniform('learning_rate', -3, 0),
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    'objective': 'reg:linear',
    'seed': 42
}

In [27]:
best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.36698                           
[1]	validation-rmse:7.16821                           
[2]	validation-rmse:6.81895                           
[3]	validation-rmse:6.69015                           
[4]	validation-rmse:6.63527                           
[5]	validation-rmse:6.61467                           
[6]	validation-rmse:6.60266                           
[7]	validation-rmse:6.59577                           
[8]	validation-rmse:6.58836                           
[9]	validation-rmse:6.58510                           
[10]	validation-rmse:6.58060                          
[11]	validation-rmse:6.57833                          
[12]	validation-rmse:6.57292                          
[13]	validation-rmse:6.56905                          
[14]	validation-rmse:6.56319                          
[15]	validation-rmse:6.56169                          
[16]	validation-rmse:6.54928                          
[17]	validation-rmse:6.54611                          
[18]	valid

  self.starting_round = model.num_boosted_rounds()



[2]	validation-rmse:6.75361                                                    
[3]	validation-rmse:6.70913                                                    
[4]	validation-rmse:6.69191                                                    
[5]	validation-rmse:6.68397                                                    
[6]	validation-rmse:6.68084                                                    
[7]	validation-rmse:6.67710                                                    
[8]	validation-rmse:6.67125                                                    
[9]	validation-rmse:6.66652                                                    
[10]	validation-rmse:6.65617                                                   
[11]	validation-rmse:6.64864                                                   
[12]	validation-rmse:6.64616                                                   
[13]	validation-rmse:6.64289                                                   
[14]	validation-rmse:6.63949            

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:8.41478                                                    
[2]	validation-rmse:7.65043                                                    
[3]	validation-rmse:7.24304                                                    
[4]	validation-rmse:7.02208                                                    
[5]	validation-rmse:6.90207                                                    
[6]	validation-rmse:6.83500                                                    
[7]	validation-rmse:6.78927                                                    
[8]	validation-rmse:6.76127                                                    
[9]	validation-rmse:6.74013                                                    
[10]	validation-rmse:6.72227                                                   
[11]	validation-rmse:6.70683                                                   
[12]	validation-rmse:6.69931                                                   
[13]	validation-rmse:6.69252            

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:10.55823                                                   
[2]	validation-rmse:9.91090                                                    
[3]	validation-rmse:9.36287                                                    
[4]	validation-rmse:8.90217                                                    
[5]	validation-rmse:8.51742                                                    
[6]	validation-rmse:8.19659                                                    
[7]	validation-rmse:7.92995                                                    
[8]	validation-rmse:7.70884                                                    
[9]	validation-rmse:7.52644                                                    
[10]	validation-rmse:7.37630                                                   
[11]	validation-rmse:7.25207                                                   
[12]	validation-rmse:7.15011                                                   
[13]	validation-rmse:7.06363            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:9.35688                                                    
[1]	validation-rmse:7.92615                                                    
[2]	validation-rmse:7.24996                                                    
[3]	validation-rmse:6.93566                                                    
[4]	validation-rmse:6.77574                                                    
[5]	validation-rmse:6.69671                                                    
[6]	validation-rmse:6.64767                                                    
[7]	validation-rmse:6.62273                                                    
[8]	validation-rmse:6.60488                                                    
[9]	validation-rmse:6.59361                                                    
[10]	validation-rmse:6.58067                                                   
[11]	validation-rmse:6.57215                                                   
[12]	validation-rmse:6.56716            

  self.starting_round = model.num_boosted_rounds()



[3]	validation-rmse:6.71864                                                    
[4]	validation-rmse:6.71423                                                    
[5]	validation-rmse:6.70891                                                    
[6]	validation-rmse:6.70745                                                    
[7]	validation-rmse:6.70837                                                    
[8]	validation-rmse:6.70556                                                    
[9]	validation-rmse:6.70147                                                    
[10]	validation-rmse:6.69646                                                   
[11]	validation-rmse:6.69094                                                   
[12]	validation-rmse:6.68750                                                   
[13]	validation-rmse:6.68452                                                   
[14]	validation-rmse:6.68071                                                   
[15]	validation-rmse:6.67706            

  self.starting_round = model.num_boosted_rounds()



[2]	validation-rmse:7.35164                                                    
[3]	validation-rmse:7.03087                                                    
[4]	validation-rmse:6.87845                                                    
[5]	validation-rmse:6.79441                                                    
[6]	validation-rmse:6.75021                                                    
[7]	validation-rmse:6.72500                                                    
[8]	validation-rmse:6.70730                                                    
[9]	validation-rmse:6.69158                                                    
[10]	validation-rmse:6.68299                                                   
[11]	validation-rmse:6.67903                                                   
[12]	validation-rmse:6.67429                                                   
[13]	validation-rmse:6.67164                                                   
[14]	validation-rmse:6.67002            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.37258                                                    
[1]	validation-rmse:6.67793                                                    
[2]	validation-rmse:6.55914                                                    
[3]	validation-rmse:6.52898                                                    
[4]	validation-rmse:6.51218                                                    
[5]	validation-rmse:6.49908                                                    
[6]	validation-rmse:6.49096                                                    
[7]	validation-rmse:6.48383                                                    
[8]	validation-rmse:6.47695                                                    
[9]	validation-rmse:6.46878                                                    
[10]	validation-rmse:6.46373                                                   
[11]	validation-rmse:6.45457                                                   
[12]	validation-rmse:6.45078            

  self.starting_round = model.num_boosted_rounds()



[6]	validation-rmse:6.86417                                                    
[7]	validation-rmse:6.83090                                                    
[8]	validation-rmse:6.80523                                                    
[9]	validation-rmse:6.78996                                                    
[10]	validation-rmse:6.78094                                                   
[11]	validation-rmse:6.77830                                                   
[12]	validation-rmse:6.77017                                                   
[13]	validation-rmse:6.76650                                                   
[14]	validation-rmse:6.76146                                                   
[15]	validation-rmse:6.75862                                                   
[16]	validation-rmse:6.75702                                                   
[17]	validation-rmse:6.75494                                                   
[18]	validation-rmse:6.75275            

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:6.73086                                                    
[2]	validation-rmse:6.67658                                                    
[3]	validation-rmse:6.65303                                                    
[4]	validation-rmse:6.64488                                                    
[5]	validation-rmse:6.63213                                                    
[6]	validation-rmse:6.62752                                                    
[7]	validation-rmse:6.62133                                                    
[8]	validation-rmse:6.61933                                                    
[9]	validation-rmse:6.61600                                                    
[10]	validation-rmse:6.61432                                                   
[11]	validation-rmse:6.61185                                                   
[12]	validation-rmse:6.60939                                                   
[13]	validation-rmse:6.60711            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.59305                                                     
[1]	validation-rmse:6.84585                                                     
[2]	validation-rmse:6.70104                                                     
[3]	validation-rmse:6.65781                                                     
[4]	validation-rmse:6.63795                                                     
[5]	validation-rmse:6.63411                                                     
[6]	validation-rmse:6.62493                                                     
[7]	validation-rmse:6.62168                                                     
[8]	validation-rmse:6.61821                                                     
[9]	validation-rmse:6.61467                                                     
[10]	validation-rmse:6.60854                                                    
[11]	validation-rmse:6.60471                                                    
[12]	validation-rmse:6.60137

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:7.71218                                                     
[2]	validation-rmse:7.13551                                                     
[3]	validation-rmse:6.89749                                                     
[4]	validation-rmse:6.78482                                                     
[5]	validation-rmse:6.73204                                                     
[6]	validation-rmse:6.70200                                                     
[7]	validation-rmse:6.67683                                                     
[8]	validation-rmse:6.66513                                                     
[9]	validation-rmse:6.65828                                                     
[10]	validation-rmse:6.65172                                                    
[11]	validation-rmse:6.64772                                                    
[12]	validation-rmse:6.64585                                                    
[13]	validation-rmse:6.64109

  self.starting_round = model.num_boosted_rounds()



[2]	validation-rmse:7.97199                                                     
[3]	validation-rmse:7.46518                                                     
[4]	validation-rmse:7.16083                                                     
[5]	validation-rmse:6.97653                                                     
[6]	validation-rmse:6.86446                                                     
[7]	validation-rmse:6.79368                                                     
[8]	validation-rmse:6.74758                                                     
[9]	validation-rmse:6.71810                                                     
[10]	validation-rmse:6.69707                                                    
[11]	validation-rmse:6.68164                                                    
[12]	validation-rmse:6.66969                                                    
[13]	validation-rmse:6.66127                                                    
[14]	validation-rmse:6.65445

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:6.95839                                                     
[2]	validation-rmse:6.73072                                                     
[3]	validation-rmse:6.65361                                                     
[4]	validation-rmse:6.61949                                                     
[5]	validation-rmse:6.60319                                                     
[6]	validation-rmse:6.59736                                                     
[7]	validation-rmse:6.58500                                                     
[8]	validation-rmse:6.58052                                                     
[9]	validation-rmse:6.57777                                                     
[10]	validation-rmse:6.57143                                                    
[11]	validation-rmse:6.56979                                                    
[12]	validation-rmse:6.56577                                                    
[13]	validation-rmse:6.56339

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.94405                                                     
[1]	validation-rmse:6.89133                                                     
[2]	validation-rmse:6.64073                                                     
[3]	validation-rmse:6.55772                                                     
[4]	validation-rmse:6.52605                                                     
[5]	validation-rmse:6.50057                                                     
[6]	validation-rmse:6.48660                                                     
[7]	validation-rmse:6.48017                                                     
[8]	validation-rmse:6.47519                                                     
[9]	validation-rmse:6.46673                                                     
[10]	validation-rmse:6.46364                                                    
[11]	validation-rmse:6.46064                                                    
[12]	validation-rmse:6.45952

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.61160                                                    
[1]	validation-rmse:9.43867                                                     
[2]	validation-rmse:8.59724                                                     
[3]	validation-rmse:8.00173                                                     
[4]	validation-rmse:7.58781                                                     
[5]	validation-rmse:7.29660                                                     
[6]	validation-rmse:7.09707                                                     
[7]	validation-rmse:6.95520                                                     
[8]	validation-rmse:6.85522                                                     
[9]	validation-rmse:6.77940                                                     
[10]	validation-rmse:6.72767                                                    
[11]	validation-rmse:6.68341                                                    
[12]	validation-rmse:6.65674

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:9.36851                                                     
[2]	validation-rmse:8.52579                                                     
[3]	validation-rmse:7.94679                                                     
[4]	validation-rmse:7.54636                                                     
[5]	validation-rmse:7.27018                                                     
[6]	validation-rmse:7.08582                                                     
[7]	validation-rmse:6.95410                                                     
[8]	validation-rmse:6.86639                                                     
[9]	validation-rmse:6.80477                                                     
[10]	validation-rmse:6.75991                                                    
[11]	validation-rmse:6.72871                                                    
[12]	validation-rmse:6.70717                                                    
[13]	validation-rmse:6.68910

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.05318                                                    
[1]	validation-rmse:10.11274                                                    
[2]	validation-rmse:9.36162                                                     
[3]	validation-rmse:8.75971                                                     
[4]	validation-rmse:8.27999                                                     
[5]	validation-rmse:7.91000                                                     
[6]	validation-rmse:7.61516                                                     
[7]	validation-rmse:7.38460                                                     
[8]	validation-rmse:7.21133                                                     
[9]	validation-rmse:7.07746                                                     
[10]	validation-rmse:6.97410                                                    
[11]	validation-rmse:6.88640                                                    
[12]	validation-rmse:6.82124

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:6.75033                                                     
[2]	validation-rmse:6.65230                                                     
[3]	validation-rmse:6.62833                                                     
[4]	validation-rmse:6.61019                                                     
[5]	validation-rmse:6.59725                                                     
[6]	validation-rmse:6.58941                                                     
[7]	validation-rmse:6.58530                                                     
[8]	validation-rmse:6.58018                                                     
[9]	validation-rmse:6.57790                                                     
[10]	validation-rmse:6.57435                                                    
[11]	validation-rmse:6.57183                                                    
[12]	validation-rmse:6.56930                                                    
[13]	validation-rmse:6.56715

  self.starting_round = model.num_boosted_rounds()



[4]	validation-rmse:10.42300                                                    
[5]	validation-rmse:10.13548                                                    
[6]	validation-rmse:9.86864                                                     
[7]	validation-rmse:9.62088                                                     
[8]	validation-rmse:9.39124                                                     
[9]	validation-rmse:9.17808                                                     
[10]	validation-rmse:8.98142                                                    
[11]	validation-rmse:8.79952                                                    
[12]	validation-rmse:8.63130                                                    
[13]	validation-rmse:8.47632                                                    
[14]	validation-rmse:8.33354                                                    
[15]	validation-rmse:8.20176                                                    
[16]	validation-rmse:8.08036

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.45122                                                    
[1]	validation-rmse:10.78245                                                    
[2]	validation-rmse:10.19696                                                    
[3]	validation-rmse:9.68786                                                     
[4]	validation-rmse:9.24347                                                     
[5]	validation-rmse:8.86006                                                     
[6]	validation-rmse:8.52900                                                     
[7]	validation-rmse:8.24472                                                     
[8]	validation-rmse:8.00233                                                     
[9]	validation-rmse:7.79266                                                     
[10]	validation-rmse:7.61443                                                    
[11]	validation-rmse:7.46001                                                    
[12]	validation-rmse:7.33082

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.68228                                                   
[1]	validation-rmse:11.19592                                                   
[2]	validation-rmse:10.75183                                                   
[3]	validation-rmse:10.34624                                                   
[4]	validation-rmse:9.97679                                                    
[5]	validation-rmse:9.64055                                                    
[6]	validation-rmse:9.33543                                                    
[7]	validation-rmse:9.05933                                                    
[8]	validation-rmse:8.80879                                                    
[9]	validation-rmse:8.58315                                                    
[10]	validation-rmse:8.37884                                                   
[11]	validation-rmse:8.19647                                                   
[12]	validation-rmse:8.03119            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.77923                                                   
[1]	validation-rmse:11.37495                                                   
[2]	validation-rmse:10.99928                                                   
[3]	validation-rmse:10.65069                                                   
[4]	validation-rmse:10.32602                                                   
[5]	validation-rmse:10.02601                                                   
[6]	validation-rmse:9.74712                                                    
[7]	validation-rmse:9.48927                                                    
[8]	validation-rmse:9.25112                                                    
[9]	validation-rmse:9.03175                                                    
[10]	validation-rmse:8.82915                                                   
[11]	validation-rmse:8.64333                                                   
[12]	validation-rmse:8.47216            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.63652                                                   
[1]	validation-rmse:11.11321                                                   
[2]	validation-rmse:10.63893                                                   
[3]	validation-rmse:10.21041                                                   
[4]	validation-rmse:9.82392                                                    
[5]	validation-rmse:9.47516                                                    
[6]	validation-rmse:9.16240                                                    
[7]	validation-rmse:8.88197                                                    
[8]	validation-rmse:8.62915                                                    
[9]	validation-rmse:8.40376                                                    
[10]	validation-rmse:8.20193                                                   
[11]	validation-rmse:8.02197                                                   
[12]	validation-rmse:7.86263            

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:10.96296                                                   
[2]	validation-rmse:10.43734                                                   
[3]	validation-rmse:9.97070                                                    
[4]	validation-rmse:9.55688                                                    
[5]	validation-rmse:9.19123                                                    
[6]	validation-rmse:8.86921                                                    
[7]	validation-rmse:8.58382                                                    
[8]	validation-rmse:8.33338                                                    
[9]	validation-rmse:8.11616                                                    
[10]	validation-rmse:7.92384                                                   
[11]	validation-rmse:7.75733                                                   
[12]	validation-rmse:7.61088                                                   
[13]	validation-rmse:7.48352            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.36528                                                   
[1]	validation-rmse:10.63429                                                   
[2]	validation-rmse:10.00298                                                   
[3]	validation-rmse:9.46413                                                    
[4]	validation-rmse:9.00474                                                    
[5]	validation-rmse:8.61506                                                    
[6]	validation-rmse:8.28648                                                    
[7]	validation-rmse:8.00868                                                    
[8]	validation-rmse:7.77615                                                    
[9]	validation-rmse:7.58201                                                    
[10]	validation-rmse:7.41499                                                   
[11]	validation-rmse:7.27766                                                   
[12]	validation-rmse:7.16229            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.38280                                                   
[1]	validation-rmse:10.66408                                                   
[2]	validation-rmse:10.04210                                                   
[3]	validation-rmse:9.50752                                                    
[4]	validation-rmse:9.04956                                                    
[5]	validation-rmse:8.65894                                                    
[6]	validation-rmse:8.32744                                                    
[7]	validation-rmse:8.04557                                                    
[8]	validation-rmse:7.80940                                                    
[9]	validation-rmse:7.60807                                                    
[10]	validation-rmse:7.43981                                                   
[11]	validation-rmse:7.29841                                                   
[12]	validation-rmse:7.17882            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.05326                                                   
[1]	validation-rmse:10.11046                                                   
[2]	validation-rmse:9.35251                                                    
[3]	validation-rmse:8.74554                                                    
[4]	validation-rmse:8.26486                                                    
[5]	validation-rmse:7.88770                                                    
[6]	validation-rmse:7.59244                                                    
[7]	validation-rmse:7.36304                                                    
[8]	validation-rmse:7.18406                                                    
[9]	validation-rmse:7.04403                                                    
[10]	validation-rmse:6.93502                                                   
[11]	validation-rmse:6.84908                                                   
[12]	validation-rmse:6.78142            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.88638                                                   
[1]	validation-rmse:9.84713                                                    
[2]	validation-rmse:9.04014                                                    
[3]	validation-rmse:8.42115                                                    
[4]	validation-rmse:7.95256                                                    
[5]	validation-rmse:7.59833                                                    
[6]	validation-rmse:7.33402                                                    
[7]	validation-rmse:7.13618                                                    
[8]	validation-rmse:6.98484                                                    
[9]	validation-rmse:6.87238                                                    
[10]	validation-rmse:6.78687                                                   
[11]	validation-rmse:6.72082                                                   
[12]	validation-rmse:6.67036            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.37751                                                   
[1]	validation-rmse:10.65342                                                   
[2]	validation-rmse:10.02891                                                   
[3]	validation-rmse:9.49345                                                    
[4]	validation-rmse:9.03306                                                    
[5]	validation-rmse:8.64112                                                    
[6]	validation-rmse:8.30903                                                    
[7]	validation-rmse:8.02843                                                    
[8]	validation-rmse:7.79122                                                    
[9]	validation-rmse:7.59339                                                    
[10]	validation-rmse:7.42523                                                   
[11]	validation-rmse:7.28392                                                   
[12]	validation-rmse:7.16617            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.52027                                                   
[1]	validation-rmse:10.90574                                                   
[2]	validation-rmse:10.36157                                                   
[3]	validation-rmse:9.88213                                                    
[4]	validation-rmse:9.46225                                                    
[5]	validation-rmse:9.09036                                                    
[6]	validation-rmse:8.76681                                                    
[7]	validation-rmse:8.47644                                                    
[8]	validation-rmse:8.23216                                                    
[9]	validation-rmse:8.01147                                                    
[10]	validation-rmse:7.82526                                                   
[11]	validation-rmse:7.65987                                                   
[12]	validation-rmse:7.51997            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.21838                                                   
[1]	validation-rmse:10.38858                                                   
[2]	validation-rmse:9.69911                                                    
[3]	validation-rmse:9.12844                                                    
[4]	validation-rmse:8.66024                                                    
[5]	validation-rmse:8.27956                                                    
[6]	validation-rmse:7.96613                                                    
[7]	validation-rmse:7.71462                                                    
[8]	validation-rmse:7.51075                                                    
[9]	validation-rmse:7.34669                                                    
[10]	validation-rmse:7.21307                                                   
[11]	validation-rmse:7.10692                                                   
[12]	validation-rmse:7.01959            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.67870                                                   
[1]	validation-rmse:11.18937                                                   
[2]	validation-rmse:10.74182                                                   
[3]	validation-rmse:10.33369                                                   
[4]	validation-rmse:9.96171                                                    
[5]	validation-rmse:9.62278                                                    
[6]	validation-rmse:9.31558                                                    
[7]	validation-rmse:9.03631                                                    
[8]	validation-rmse:8.78366                                                    
[9]	validation-rmse:8.55585                                                    
[10]	validation-rmse:8.34937                                                   
[11]	validation-rmse:8.16244                                                   
[12]	validation-rmse:7.99475            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.81346                                                   
[1]	validation-rmse:9.73566                                                    
[2]	validation-rmse:8.91613                                                    
[3]	validation-rmse:8.30123                                                    
[4]	validation-rmse:7.84323                                                    
[5]	validation-rmse:7.50549                                                    
[6]	validation-rmse:7.25975                                                    
[7]	validation-rmse:7.07974                                                    
[8]	validation-rmse:6.94688                                                    
[9]	validation-rmse:6.84872                                                    
[10]	validation-rmse:6.77358                                                   
[11]	validation-rmse:6.72063                                                   
[12]	validation-rmse:6.67860            

  self.starting_round = model.num_boosted_rounds()



[12]	validation-rmse:6.83312                                                   
[13]	validation-rmse:6.82505                                                   
[14]	validation-rmse:6.81872                                                   
[15]	validation-rmse:6.81574                                                   
[16]	validation-rmse:6.81163                                                   
[17]	validation-rmse:6.80933                                                   
[18]	validation-rmse:6.80684                                                   
[19]	validation-rmse:6.80063                                                   
[20]	validation-rmse:6.79799                                                   
[21]	validation-rmse:6.79666                                                   
[22]	validation-rmse:6.79481                                                   
[23]	validation-rmse:6.79457                                                   
[24]	validation-rmse:6.79206            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.48378                                                   
[1]	validation-rmse:10.84308                                                   
[2]	validation-rmse:10.27868                                                   
[3]	validation-rmse:9.78306                                                    
[4]	validation-rmse:9.35150                                                    
[5]	validation-rmse:8.97626                                                    
[6]	validation-rmse:8.64947                                                    
[7]	validation-rmse:8.36337                                                    
[8]	validation-rmse:8.12169                                                    
[9]	validation-rmse:7.90599                                                    
[10]	validation-rmse:7.72276                                                   
[11]	validation-rmse:7.56818                                                   
[12]	validation-rmse:7.42927            

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:10.35222                                                   
[2]	validation-rmse:9.64934                                                    
[3]	validation-rmse:9.06968                                                    
[4]	validation-rmse:8.59263                                                    
[5]	validation-rmse:8.20428                                                    
[6]	validation-rmse:7.89105                                                    
[7]	validation-rmse:7.63438                                                    
[8]	validation-rmse:7.42962                                                    
[9]	validation-rmse:7.26385                                                    
[10]	validation-rmse:7.12943                                                   
[11]	validation-rmse:7.02114                                                   
[12]	validation-rmse:6.93438                                                   
[13]	validation-rmse:6.86419            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.77879                                                   
[1]	validation-rmse:11.37570                                                   
[2]	validation-rmse:11.00140                                                   
[3]	validation-rmse:10.65294                                                   
[4]	validation-rmse:10.33165                                                   
[5]	validation-rmse:10.03259                                                   
[6]	validation-rmse:9.75530                                                    
[7]	validation-rmse:9.50127                                                    
[8]	validation-rmse:9.26631                                                    
[9]	validation-rmse:9.04694                                                    
[10]	validation-rmse:8.84791                                                   
[11]	validation-rmse:8.66263                                                   
[12]	validation-rmse:8.49301            

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:11.27005                                                   
[2]	validation-rmse:10.85420                                                   
[3]	validation-rmse:10.47190                                                   
[4]	validation-rmse:10.12156                                                   
[5]	validation-rmse:9.80035                                                    
[6]	validation-rmse:9.50636                                                    
[7]	validation-rmse:9.23761                                                    
[8]	validation-rmse:8.99250                                                    
[9]	validation-rmse:8.76923                                                    
[10]	validation-rmse:8.56628                                                   
[11]	validation-rmse:8.38143                                                   
[12]	validation-rmse:8.21309                                                   
[13]	validation-rmse:8.06127            

  self.starting_round = model.num_boosted_rounds()



[4]	validation-rmse:9.31696                                                    
[5]	validation-rmse:8.94352                                                    
[6]	validation-rmse:8.62251                                                    
[7]	validation-rmse:8.34646                                                    
[8]	validation-rmse:8.10828                                                    
[9]	validation-rmse:7.90597                                                    
[10]	validation-rmse:7.73192                                                   
[11]	validation-rmse:7.58324                                                   
[12]	validation-rmse:7.45737                                                   
[13]	validation-rmse:7.34853                                                   
[14]	validation-rmse:7.25684                                                   
[15]	validation-rmse:7.17841                                                   
[16]	validation-rmse:7.11045            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.75326                                                    
[1]	validation-rmse:7.40113                                                    
[2]	validation-rmse:6.90869                                                    
[3]	validation-rmse:6.71946                                                    
[4]	validation-rmse:6.63608                                                    
[5]	validation-rmse:6.58933                                                    
[6]	validation-rmse:6.56413                                                    
[7]	validation-rmse:6.54905                                                    
[8]	validation-rmse:6.52986                                                    
[9]	validation-rmse:6.52363                                                    
[10]	validation-rmse:6.52021                                                   
[11]	validation-rmse:6.51672                                                   
[12]	validation-rmse:6.51439            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.22281                                                   
[1]	validation-rmse:10.39001                                                   
[2]	validation-rmse:9.69383                                                    
[3]	validation-rmse:9.11712                                                    
[4]	validation-rmse:8.63739                                                    
[5]	validation-rmse:8.24709                                                    
[6]	validation-rmse:7.92461                                                    
[7]	validation-rmse:7.66441                                                    
[8]	validation-rmse:7.45234                                                    
[9]	validation-rmse:7.27870                                                    
[10]	validation-rmse:7.13918                                                   
[11]	validation-rmse:7.02539                                                   
[12]	validation-rmse:6.93172            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.60941                                                   
[1]	validation-rmse:11.06627                                                   
[2]	validation-rmse:10.57622                                                   
[3]	validation-rmse:10.13520                                                   
[4]	validation-rmse:9.73983                                                    
[5]	validation-rmse:9.38736                                                    
[6]	validation-rmse:9.06920                                                    
[7]	validation-rmse:8.78916                                                    
[8]	validation-rmse:8.53835                                                    
[9]	validation-rmse:8.31606                                                    
[10]	validation-rmse:8.12240                                                   
[11]	validation-rmse:7.94714                                                   
[12]	validation-rmse:7.79374            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.19774                                                   
[1]	validation-rmse:8.85922                                                    
[2]	validation-rmse:7.98878                                                    
[3]	validation-rmse:7.44200                                                    
[4]	validation-rmse:7.09960                                                    
[5]	validation-rmse:6.88412                                                    
[6]	validation-rmse:6.74600                                                    
[7]	validation-rmse:6.65950                                                    
[8]	validation-rmse:6.60351                                                    
[9]	validation-rmse:6.56399                                                    
[10]	validation-rmse:6.53688                                                   
[11]	validation-rmse:6.51704                                                   
[12]	validation-rmse:6.50181            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:9.88991                                                    
[1]	validation-rmse:8.48109                                                    
[2]	validation-rmse:7.68153                                                    
[3]	validation-rmse:7.21654                                                    
[4]	validation-rmse:6.95740                                                    
[5]	validation-rmse:6.80500                                                    
[6]	validation-rmse:6.71918                                                    
[7]	validation-rmse:6.66391                                                    
[8]	validation-rmse:6.62412                                                    
[9]	validation-rmse:6.60136                                                    
[10]	validation-rmse:6.58600                                                   
[11]	validation-rmse:6.57499                                                   
[12]	validation-rmse:6.56534            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.67038                                                   
[1]	validation-rmse:9.51274                                                    
[2]	validation-rmse:8.65770                                                    
[3]	validation-rmse:8.03284                                                    
[4]	validation-rmse:7.58519                                                    
[5]	validation-rmse:7.26388                                                    
[6]	validation-rmse:7.03776                                                    
[7]	validation-rmse:6.87773                                                    
[8]	validation-rmse:6.76190                                                    
[9]	validation-rmse:6.67779                                                    
[10]	validation-rmse:6.61722                                                   
[11]	validation-rmse:6.57339                                                   
[12]	validation-rmse:6.53858            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.02083                                                   
[1]	validation-rmse:10.06029                                                   
[2]	validation-rmse:9.29296                                                    
[3]	validation-rmse:8.68485                                                    
[4]	validation-rmse:8.20926                                                    
[5]	validation-rmse:7.83806                                                    
[6]	validation-rmse:7.55077                                                    
[7]	validation-rmse:7.32886                                                    
[8]	validation-rmse:7.15800                                                    
[9]	validation-rmse:7.02520                                                    
[10]	validation-rmse:6.92243                                                   
[11]	validation-rmse:6.84274                                                   
[12]	validation-rmse:6.78046            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.32498                                                   
[1]	validation-rmse:10.56290                                                   
[2]	validation-rmse:9.91256                                                    
[3]	validation-rmse:9.36117                                                    
[4]	validation-rmse:8.89528                                                    
[5]	validation-rmse:8.50343                                                    
[6]	validation-rmse:8.17479                                                    
[7]	validation-rmse:7.89986                                                    
[8]	validation-rmse:7.67152                                                    
[9]	validation-rmse:7.48057                                                    
[10]	validation-rmse:7.32258                                                   
[11]	validation-rmse:7.19220                                                   
[12]	validation-rmse:7.08471            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.73647                                                   
[1]	validation-rmse:11.29582                                                   
[2]	validation-rmse:10.88883                                                   
[3]	validation-rmse:10.51364                                                   
[4]	validation-rmse:10.16764                                                   
[5]	validation-rmse:9.84952                                                    
[6]	validation-rmse:9.55760                                                    
[7]	validation-rmse:9.28982                                                    
[8]	validation-rmse:9.04403                                                    
[9]	validation-rmse:8.81957                                                    
[10]	validation-rmse:8.61411                                                   
[11]	validation-rmse:8.42666                                                   
[12]	validation-rmse:8.25559            

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:11.10334                                                   
[2]	validation-rmse:10.62770                                                   
[3]	validation-rmse:10.19937                                                   
[4]	validation-rmse:9.81342                                                    
[5]	validation-rmse:9.46677                                                    
[6]	validation-rmse:9.15688                                                    
[7]	validation-rmse:8.87982                                                    
[8]	validation-rmse:8.63329                                                    
[9]	validation-rmse:8.41476                                                    
[10]	validation-rmse:8.21777                                                   
[11]	validation-rmse:8.04402                                                   
[12]	validation-rmse:7.88918                                                   
[13]	validation-rmse:7.75214            

In [29]:
mlflow.xgboost.autolog(disable=True)

In [30]:
with mlflow.start_run():
    
    train = xgb.DMatrix(X_train, label=y_train)
    valid = xgb.DMatrix(X_val, label=y_val)

    best_params = {
        'learning_rate': 0.09435998655149878,
        'max_depth': 39,
        'min_child_weight': 1.9487322107872531,
        'objective': 'reg:linear',
        'reg_alpha': 0.021436135009241304,
        'reg_lambda': 0.3646085937929378,
        'seed': 42
    }

    mlflow.log_params(best_params)

    booster = xgb.train(
        params=best_params,
        dtrain=train,
        num_boost_round=1000,
        evals=[(valid, 'validation')],
        early_stopping_rounds=50
    )

    y_pred = booster.predict(valid)
    rmse = root_mean_squared_error(y_val, y_pred)
    mlflow.log_metric("rmse", rmse)

    with open("models/preprocessor.b", "wb") as f_out:
        pickle.dump(dv, f_out)
    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

    mlflow.xgboost.log_model(booster, artifact_path="models_mlflow")

  self.starting_round = model.num_boosted_rounds()


[0]	validation-rmse:11.45122
[1]	validation-rmse:10.78245
[2]	validation-rmse:10.19696
[3]	validation-rmse:9.68786
[4]	validation-rmse:9.24347
[5]	validation-rmse:8.86006
[6]	validation-rmse:8.52900
[7]	validation-rmse:8.24472
[8]	validation-rmse:8.00233
[9]	validation-rmse:7.79266
[10]	validation-rmse:7.61443
[11]	validation-rmse:7.46001
[12]	validation-rmse:7.33082
[13]	validation-rmse:7.21730
[14]	validation-rmse:7.12232
[15]	validation-rmse:7.04024
[16]	validation-rmse:6.96957
[17]	validation-rmse:6.90881
[18]	validation-rmse:6.85612
[19]	validation-rmse:6.81068
[20]	validation-rmse:6.77278
[21]	validation-rmse:6.74001
[22]	validation-rmse:6.71105
[23]	validation-rmse:6.68502
[24]	validation-rmse:6.66253
[25]	validation-rmse:6.64297
[26]	validation-rmse:6.62585
[27]	validation-rmse:6.60973
[28]	validation-rmse:6.59652
[29]	validation-rmse:6.58444
[30]	validation-rmse:6.57295
[31]	validation-rmse:6.56361
[32]	validation-rmse:6.55444
[33]	validation-rmse:6.54625
[34]	validation-rmse:

  xgb_model.save_model(model_data_path)


In [None]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.svm import LinearSVR

mlflow.sklearn.autolog()

for model_class in (RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor, LinearSVR):

    with mlflow.start_run():

        mlflow.log_param("train-data-path", "../data/green_tripdata_2021-01.csv")
        mlflow.log_param("valid-data-path", "../data/green_tripdata_2021-02.csv")
        mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

        mlmodel = model_class()
        mlmodel.fit(X_train, y_train)

        y_pred = mlmodel.predict(X_val)
        rmse = root_mean_squared_error(y_val, y_pred)
        mlflow.log_metric("rmse", rmse)