In [1]:
import pandas as pd
import numpy as np
import datetime
import sklearn
import pickle

In [2]:
import mlflow

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("nyc-taxi-experiment")

2025/03/07 09:40:03 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2025/03/07 09:40:03 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Running upgrade  -> 451aebb31d03, add metric step
INFO  [alembic.runtime.migration] Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags
INFO  [alembic.runtime.migration] Running upgrade 90e64c465722 -> 181f10493468, allow nulls for metric values
INFO  [alembic.runtime.migration] Running upgrade 181f10493468 -> df50e92ffc5e, Add Experiment Tags Table
INFO  [alembic.runtime.migration] Running upgrade df50e92ffc5e -> 7ac759974ad8, Update run tags with larger limit
INFO  [alembic.runtime.migration] Running upgrade 7ac759974ad8 -> 89d4b8295536, create latest metrics table
INFO  [89d4b8295536_create_latest_metrics_table_py] Migration complete!
INFO  

<Experiment: artifact_location='/workspaces/mlops-zoomcamp/04-deployment/batch/mlruns/1', creation_time=1741340404011, experiment_id='1', last_update_time=1741340404011, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>

In [5]:
train_df = pd.read_csv("./data/green_tripdata_2024-01.csv")

In [4]:
train_df.head()

Unnamed: 0,VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,RatecodeID,store_and_fwd_flag,PULocationID,DOLocationID,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,congestion_surcharge,Airport_fee
0,2,2024-01-01 00:57:55,2024-01-01 01:17:43,1.0,1.72,1.0,N,186,79,2,17.7,1.0,0.5,0.0,0.0,1.0,22.7,2.5,0.0
1,1,2024-01-01 00:03:00,2024-01-01 00:09:36,1.0,1.8,1.0,N,140,236,1,10.0,3.5,0.5,3.75,0.0,1.0,18.75,2.5,0.0
2,1,2024-01-01 00:17:06,2024-01-01 00:35:01,1.0,4.7,1.0,N,236,79,1,23.3,3.5,0.5,3.0,0.0,1.0,31.3,2.5,0.0
3,1,2024-01-01 00:36:38,2024-01-01 00:44:56,1.0,1.4,1.0,N,79,211,1,10.0,3.5,0.5,2.0,0.0,1.0,17.0,2.5,0.0
4,1,2024-01-01 00:46:51,2024-01-01 00:52:57,1.0,0.8,1.0,N,211,148,1,7.9,3.5,0.5,3.2,0.0,1.0,16.1,2.5,0.0


In [8]:
train_df.keys()

Index(['VendorID', 'lpep_pickup_datetime', 'lpep_dropoff_datetime',
       'store_and_fwd_flag', 'RatecodeID', 'PULocationID', 'DOLocationID',
       'passenger_count', 'trip_distance', 'fare_amount', 'extra', 'mta_tax',
       'tip_amount', 'tolls_amount', 'ehail_fee', 'improvement_surcharge',
       'total_amount', 'payment_type', 'trip_type', 'congestion_surcharge'],
      dtype='object')

In [9]:
train_df["duration"] = pd.to_datetime(train_df["lpep_dropoff_datetime"]) - pd.to_datetime(train_df["lpep_pickup_datetime"])
train_df["duration"] = train_df["duration"].dt.total_seconds() / 60

In [10]:
train_df = train_df[train_df["duration"] < 60]

In [11]:
train_df.loc[:, "PULocationID"] = train_df["PULocationID"].astype(str)
train_df.loc[:, "DOLocationID"] = train_df["DOLocationID"].astype(str)

  train_df.loc[:, "PULocationID"] = train_df["PULocationID"].astype(str)
  train_df.loc[:, "DOLocationID"] = train_df["DOLocationID"].astype(str)


In [12]:
feature_df = train_df[["PULocationID", "DOLocationID"]].drop_duplicates().reset_index(drop=True)

In [13]:
feature_df

Unnamed: 0,PULocationID,DOLocationID
0,236,239
1,65,170
2,74,262
3,74,116
4,74,243
...,...,...
5176,41,37
5177,52,227
5178,82,49
5179,159,242


In [14]:
feature_dicts = feature_df.to_dict('records')

In [15]:
feature_dicts

[{'PULocationID': '236', 'DOLocationID': '239'},
 {'PULocationID': '65', 'DOLocationID': '170'},
 {'PULocationID': '74', 'DOLocationID': '262'},
 {'PULocationID': '74', 'DOLocationID': '116'},
 {'PULocationID': '74', 'DOLocationID': '243'},
 {'PULocationID': '33', 'DOLocationID': '209'},
 {'PULocationID': '74', 'DOLocationID': '238'},
 {'PULocationID': '166', 'DOLocationID': '239'},
 {'PULocationID': '226', 'DOLocationID': '226'},
 {'PULocationID': '7', 'DOLocationID': '129'},
 {'PULocationID': '42', 'DOLocationID': '75'},
 {'PULocationID': '41', 'DOLocationID': '141'},
 {'PULocationID': '130', 'DOLocationID': '196'},
 {'PULocationID': '74', 'DOLocationID': '69'},
 {'PULocationID': '41', 'DOLocationID': '74'},
 {'PULocationID': '55', 'DOLocationID': '210'},
 {'PULocationID': '41', 'DOLocationID': '42'},
 {'PULocationID': '42', 'DOLocationID': '151'},
 {'PULocationID': '255', 'DOLocationID': '255'},
 {'PULocationID': '41', 'DOLocationID': '24'},
 {'PULocationID': '255', 'DOLocationID': 

In [16]:
from sklearn.feature_extraction import DictVectorizer

In [17]:
dv = DictVectorizer()

In [18]:
dv.fit_transform(feature_dicts).toarray()

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], shape=(5181, 450))

In [19]:
dv.get_feature_names_out()

array(['DOLocationID=1', 'DOLocationID=10', 'DOLocationID=100',
       'DOLocationID=101', 'DOLocationID=102', 'DOLocationID=106',
       'DOLocationID=107', 'DOLocationID=108', 'DOLocationID=11',
       'DOLocationID=112', 'DOLocationID=113', 'DOLocationID=114',
       'DOLocationID=116', 'DOLocationID=117', 'DOLocationID=118',
       'DOLocationID=119', 'DOLocationID=12', 'DOLocationID=120',
       'DOLocationID=121', 'DOLocationID=122', 'DOLocationID=123',
       'DOLocationID=124', 'DOLocationID=125', 'DOLocationID=126',
       'DOLocationID=127', 'DOLocationID=128', 'DOLocationID=129',
       'DOLocationID=13', 'DOLocationID=130', 'DOLocationID=131',
       'DOLocationID=132', 'DOLocationID=133', 'DOLocationID=134',
       'DOLocationID=135', 'DOLocationID=136', 'DOLocationID=137',
       'DOLocationID=138', 'DOLocationID=139', 'DOLocationID=14',
       'DOLocationID=140', 'DOLocationID=141', 'DOLocationID=142',
       'DOLocationID=143', 'DOLocationID=144', 'DOLocationID=145',
  

520

Get feature matrix

In [20]:
feature_dicts = train_df[['PULocationID', 'DOLocationID']].to_dict(orient='records')

In [21]:
feature_matrix_train = dv.transform(feature_dicts)

# Training

In [22]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [23]:
duration = train_df["duration"].to_numpy()

In [24]:
lr = LinearRegression().fit(feature_matrix_train, duration)

In [28]:
# save model
with open('./models/lin_reg.bin', 'wb') as f_out:
    pickle.dump((dv, lr), f_out)

In [29]:
duration_pred = lr.predict(feature_matrix_train)
mse = mean_squared_error(duration, duration_pred)
rmse = np.sqrt(mse)
rmse

np.float64(7.281606147116313)

In [30]:
del train_df

import gc
gc.collect()

1588

In [31]:
val_df = pd.read_csv("./data/green_tripdata_2024-02.csv")

In [34]:
val_df["duration"] = pd.to_datetime(val_df["lpep_dropoff_datetime"]) - pd.to_datetime(val_df["lpep_pickup_datetime"])
val_df["duration"] = val_df["duration"].dt.total_seconds() / 60
val_df = val_df[val_df["duration"] < 60]

val_df.loc[:, "PULocationID"] = val_df["PULocationID"].astype(str)
val_df.loc[:, "DOLocationID"] = val_df["DOLocationID"].astype(str)

  val_df.loc[:, "PULocationID"] = val_df["PULocationID"].astype(str)
  val_df.loc[:, "DOLocationID"] = val_df["DOLocationID"].astype(str)


In [35]:
feature_dicts = val_df[['PULocationID', 'DOLocationID']].to_dict(orient='records')
feature_matrix_val = dv.transform(feature_dicts)

val_duration = val_df["duration"].to_numpy()
duration_pred = lr.predict(feature_matrix_val)
mse = mean_squared_error(val_duration, duration_pred)
rmse = np.sqrt(mse)
rmse

np.float64(7.458980178490645)

Train 2nd model

In [36]:
from sklearn.linear_model import Lasso

In [37]:
with mlflow.start_run():
    
    # setting tags for easier filtering
    mlflow.set_tag("developer", "katie")

    mlflow.log_param("train-data-path", "./data/green_tripdata_2024-01.csv")
    mlflow.log_param("valid-data-path", "./data/green_tripdata_2024-02.csv")
    
    alpha = 0.1
    
    mlflow.log_param("alpha", alpha)
    
    lr = Lasso(alpha)
    lr.fit(feature_matrix_train, duration)

    duration_pred = lr.predict(feature_matrix_val)
    mse = mean_squared_error(val_duration, duration_pred)
    rmse = np.sqrt(mse)

    mlflow.log_metric("rmse", rmse)

    # saving model as an artefact
    mlflow.sklearn.log_model(lr, artifact_path="models_mlflow")



## Logging hyper-parameters

In [39]:
import xgboost as xgb
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [41]:
train = xgb.DMatrix(feature_matrix_train, label=duration)
valid = xgb.DMatrix(feature_matrix_val, label=val_duration)

In [31]:
# setting objective function
def objective(params):

    with mlflow.start_run():
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)
        booster = xgb.train(
            params=params,
            # on training data
            dtrain=train,
            num_boost_round=100,
            # use validation set to control the optimisation algorithm
            # minimise error on validation set
            evals=[(valid, "validation")],
            # stop if no improvements for 50 rounds
            early_stopping_rounds=50
        )
        y_pred = booster.predict(valid)
        mse = mean_squared_error(val_duration, y_pred)
        mlflow.log_metric("rmse", mse**0.5)

    return {'loss': rmse, 'status': STATUS_OK}

In [32]:
# search space - range for hyperopt to explore the parameters
search_space = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    # log uniform space
    'learning_rate': hp.loguniform('learning_rate', -3, 0),   # between exp(-3) and exp(0) - [0.05, 1]
    'reg_alpha': hp.loguniform('reg_lambda', -6, -1),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    # this is a regression problem, so use regression as objective function
    'objective': 'reg:linear',
    'seed': 42
}

best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)

  0%|                                                                        | 0/50 [00:00<?, ?trial/s, best loss=?]




[0]	validation-rmse:9.04911                                                                                         
[1]	validation-rmse:8.38270                                                                                         
[2]	validation-rmse:8.05224                                                                                         
[3]	validation-rmse:7.65542                                                                                         
[4]	validation-rmse:7.54594                                                                                         
[5]	validation-rmse:7.46376                                                                                         
[6]	validation-rmse:7.38224                                                                                         
[7]	validation-rmse:7.22001                                                                                         
[8]	validation-rmse:7.17593                                     




[0]	validation-rmse:6.80400                                                                                         
[1]	validation-rmse:6.10983                                                                                         
[2]	validation-rmse:5.74874                                                                                         
[3]	validation-rmse:5.70845                                                                                         
[4]	validation-rmse:5.67872                                                                                         
[5]	validation-rmse:5.64584                                                                                         
[6]	validation-rmse:5.61140                                                                                         
[7]	validation-rmse:5.60920                                                                                         
[8]	validation-rmse:5.60683                                     




[0]	validation-rmse:9.96004                                                                                         
[1]	validation-rmse:9.64997                                                                                         
[2]	validation-rmse:9.37628                                                                                         
[3]	validation-rmse:9.09685                                                                                         
[4]	validation-rmse:8.88022                                                                                         
[5]	validation-rmse:8.68255                                                                                         
[6]	validation-rmse:8.51469                                                                                         
[7]	validation-rmse:8.23184                                                                                         
[8]	validation-rmse:7.98382                                     




[0]	validation-rmse:9.03863                                                                                         
[1]	validation-rmse:8.47627                                                                                         
[2]	validation-rmse:8.19516                                                                                         
[3]	validation-rmse:8.04254                                                                                         
[4]	validation-rmse:7.80126                                                                                         
[5]	validation-rmse:7.72376                                                                                         
[6]	validation-rmse:7.64386                                                                                         
[7]	validation-rmse:7.59133                                                                                         
[8]	validation-rmse:7.54330                                     




[0]	validation-rmse:9.72025                                                                                         
[1]	validation-rmse:9.24082                                                                                         
[2]	validation-rmse:8.84856                                                                                         
[3]	validation-rmse:8.36990                                                                                         
[4]	validation-rmse:7.96113                                                                                         
[5]	validation-rmse:7.75285                                                                                         
[6]	validation-rmse:7.42756                                                                                         
[7]	validation-rmse:7.12920                                                                                         
[8]	validation-rmse:7.01325                                     




[0]	validation-rmse:9.99957                                                                                         
[1]	validation-rmse:9.72917                                                                                         
[2]	validation-rmse:9.48553                                                                                         
[3]	validation-rmse:9.25813                                                                                         
[4]	validation-rmse:9.04959                                                                                         
[5]	validation-rmse:8.85692                                                                                         
[6]	validation-rmse:8.68514                                                                                         
[7]	validation-rmse:8.53182                                                                                         
[8]	validation-rmse:8.38928                                     




[0]	validation-rmse:9.94180                                                                                         
[1]	validation-rmse:9.61987                                                                                         
[2]	validation-rmse:9.35810                                                                                         
[3]	validation-rmse:9.12055                                                                                         
[4]	validation-rmse:8.89951                                                                                         
[5]	validation-rmse:8.72193                                                                                         
[6]	validation-rmse:8.58519                                                                                         
[7]	validation-rmse:8.44507                                                                                         
[8]	validation-rmse:8.34370                                     




[0]	validation-rmse:9.61613                                                                                         
[1]	validation-rmse:9.07735                                                                                         
[2]	validation-rmse:8.65806                                                                                         
[3]	validation-rmse:8.07594                                                                                         
[4]	validation-rmse:7.60584                                                                                         
[5]	validation-rmse:7.39915                                                                                         
[6]	validation-rmse:7.09821                                                                                         
[7]	validation-rmse:6.97470                                                                                         
[8]	validation-rmse:6.87330                                     




[0]	validation-rmse:9.95895                                                                                         
[1]	validation-rmse:9.65602                                                                                         
[2]	validation-rmse:9.37975                                                                                         
[3]	validation-rmse:9.13130                                                                                         
[4]	validation-rmse:8.91021                                                                                         
[5]	validation-rmse:8.71422                                                                                         
[6]	validation-rmse:8.46981                                                                                         
[7]	validation-rmse:8.31111                                                                                         
[8]	validation-rmse:8.07884                                     




[0]	validation-rmse:9.44172                                                                                         
[1]	validation-rmse:8.80277                                                                                         
[2]	validation-rmse:8.34218                                                                                         
[3]	validation-rmse:7.69368                                                                                         
[4]	validation-rmse:7.21262                                                                                         
[5]	validation-rmse:7.00842                                                                                         
[6]	validation-rmse:6.64389                                                                                         
[7]	validation-rmse:6.43007                                                                                         
[8]	validation-rmse:6.34347                                     




[0]	validation-rmse:8.09513                                                                                         
[1]	validation-rmse:7.73184                                                                                         
[2]	validation-rmse:7.55520                                                                                         
[3]	validation-rmse:7.26790                                                                                         
[4]	validation-rmse:7.13983                                                                                         
[5]	validation-rmse:7.05035                                                                                         
[6]	validation-rmse:6.77183                                                                                         
[7]	validation-rmse:6.59830                                                                                         
[8]	validation-rmse:6.56389                                     




[0]	validation-rmse:9.14653                                                                                         
[1]	validation-rmse:8.38863                                                                                         
[2]	validation-rmse:7.89192                                                                                         
[3]	validation-rmse:7.18562                                                                                         
[4]	validation-rmse:6.72475                                                                                         
[5]	validation-rmse:6.54265                                                                                         
[6]	validation-rmse:6.27145                                                                                         
[7]	validation-rmse:6.18856                                                                                         
[8]	validation-rmse:5.98297                                     




[0]	validation-rmse:8.42796                                                                                         
[1]	validation-rmse:6.89518                                                                                         
[2]	validation-rmse:6.54928                                                                                         
[3]	validation-rmse:6.18188                                                                                         
[4]	validation-rmse:6.10111                                                                                         
[5]	validation-rmse:6.03182                                                                                         
[6]	validation-rmse:5.99343                                                                                         
[7]	validation-rmse:5.76051                                                                                         
[8]	validation-rmse:5.74585                                     




[0]	validation-rmse:9.85258                                                                                         
[1]	validation-rmse:9.47190                                                                                         
[2]	validation-rmse:9.16559                                                                                         
[3]	validation-rmse:8.91512                                                                                         
[4]	validation-rmse:8.72394                                                                                         
[5]	validation-rmse:8.56171                                                                                         
[6]	validation-rmse:8.37628                                                                                         
[7]	validation-rmse:8.27090                                                                                         
[8]	validation-rmse:8.18297                                     




[0]	validation-rmse:9.78296                                                                                         
[1]	validation-rmse:9.34662                                                                                         
[2]	validation-rmse:8.96440                                                                                         
[3]	validation-rmse:8.66882                                                                                         
[4]	validation-rmse:8.25639                                                                                         
[5]	validation-rmse:8.04599                                                                                         
[6]	validation-rmse:7.69825                                                                                         
[7]	validation-rmse:7.42534                                                                                         
[8]	validation-rmse:7.30883                                     




[0]	validation-rmse:9.60524                                                                                         
[1]	validation-rmse:9.02275                                                                                         
[2]	validation-rmse:8.59438                                                                                         
[3]	validation-rmse:8.14750                                                                                         
[4]	validation-rmse:7.88204                                                                                         
[5]	validation-rmse:7.42676                                                                                         
[6]	validation-rmse:7.27044                                                                                         
[7]	validation-rmse:6.93092                                                                                         
[8]	validation-rmse:6.73488                                     




[0]	validation-rmse:10.10539                                                                                        
[1]	validation-rmse:9.92163                                                                                         
[2]	validation-rmse:9.75045                                                                                         
[3]	validation-rmse:9.59539                                                                                         
[4]	validation-rmse:9.45243                                                                                         
[5]	validation-rmse:9.32525                                                                                         
[6]	validation-rmse:9.21190                                                                                         
[7]	validation-rmse:9.10201                                                                                         
[8]	validation-rmse:9.00392                                     




[0]	validation-rmse:10.01979                                                                                        
[1]	validation-rmse:9.75849                                                                                         
[2]	validation-rmse:9.52148                                                                                         
[3]	validation-rmse:9.30520                                                                                         
[4]	validation-rmse:9.10470                                                                                         
[5]	validation-rmse:8.92998                                                                                         
[6]	validation-rmse:8.70052                                                                                         
[7]	validation-rmse:8.49908                                                                                         
[8]	validation-rmse:8.33916                                     




[0]	validation-rmse:7.67138                                                                                         
[1]	validation-rmse:7.08735                                                                                         
[2]	validation-rmse:6.49049                                                                                         
[3]	validation-rmse:6.29231                                                                                         
[4]	validation-rmse:6.06824                                                                                         
[5]	validation-rmse:5.96788                                                                                         
[6]	validation-rmse:5.90557                                                                                         
[7]	validation-rmse:5.83738                                                                                         
[8]	validation-rmse:5.76042                                     




[0]	validation-rmse:8.77141                                                                                         
[1]	validation-rmse:8.25007                                                                                         
[2]	validation-rmse:8.02216                                                                                         
[3]	validation-rmse:7.87658                                                                                         
[4]	validation-rmse:7.56016                                                                                         
[5]	validation-rmse:7.47680                                                                                         
[6]	validation-rmse:7.40442                                                                                         
[7]	validation-rmse:7.31895                                                                                         
[8]	validation-rmse:7.27601                                     




[0]	validation-rmse:9.01054                                                                                         
[1]	validation-rmse:8.22649                                                                                         
[2]	validation-rmse:7.22906                                                                                         
[3]	validation-rmse:6.93050                                                                                         
[4]	validation-rmse:6.41685                                                                                         
[5]	validation-rmse:6.28846                                                                                         
[6]	validation-rmse:6.21211                                                                                         
[7]	validation-rmse:5.95345                                                                                         
[8]	validation-rmse:5.91535                                     




[0]	validation-rmse:9.00603                                                                                         
[1]	validation-rmse:8.20526                                                                                         
[2]	validation-rmse:7.35548                                                                                         
[3]	validation-rmse:7.04753                                                                                         
[4]	validation-rmse:6.84650                                                                                         
[5]	validation-rmse:6.36188                                                                                         
[6]	validation-rmse:6.27696                                                                                         
[7]	validation-rmse:6.21565                                                                                         
[8]	validation-rmse:5.94949                                     




[0]	validation-rmse:9.12998                                                                                         
[1]	validation-rmse:8.36269                                                                                         
[2]	validation-rmse:7.55933                                                                                         
[3]	validation-rmse:7.23024                                                                                         
[4]	validation-rmse:7.01998                                                                                         
[5]	validation-rmse:6.52590                                                                                         
[6]	validation-rmse:6.42576                                                                                         
[7]	validation-rmse:6.31051                                                                                         
[8]	validation-rmse:6.05701                                     




[0]	validation-rmse:8.12773                                                                                         
[1]	validation-rmse:7.45175                                                                                         
[2]	validation-rmse:7.07949                                                                                         
[3]	validation-rmse:6.92071                                                                                         
[4]	validation-rmse:6.34634                                                                                         
[5]	validation-rmse:6.29766                                                                                         
[6]	validation-rmse:6.16600                                                                                         
[7]	validation-rmse:6.13397                                                                                         
[8]	validation-rmse:6.09848                                     




[0]	validation-rmse:9.40377                                                                                         
[1]	validation-rmse:8.74698                                                                                         
[2]	validation-rmse:8.27388                                                                                         
[3]	validation-rmse:7.56996                                                                                         
[4]	validation-rmse:7.05698                                                                                         
[5]	validation-rmse:6.75032                                                                                         
[6]	validation-rmse:6.61110                                                                                         
[7]	validation-rmse:6.51446                                                                                         
[8]	validation-rmse:6.26358                                     




[0]	validation-rmse:8.83944                                                                                         
[1]	validation-rmse:8.00486                                                                                         
[2]	validation-rmse:7.51598                                                                                         
[3]	validation-rmse:6.70357                                                                                         
[4]	validation-rmse:6.31732                                                                                         
[5]	validation-rmse:6.21323                                                                                         
[6]	validation-rmse:6.14432                                                                                         
[7]	validation-rmse:5.88847                                                                                         
[8]	validation-rmse:5.85411                                     




[0]	validation-rmse:8.70209                                                                                         
[1]	validation-rmse:7.85945                                                                                         
[2]	validation-rmse:6.84995                                                                                         
[3]	validation-rmse:6.60541                                                                                         
[4]	validation-rmse:6.11907                                                                                         
[5]	validation-rmse:6.03196                                                                                         
[6]	validation-rmse:5.92138                                                                                         
[7]	validation-rmse:5.88200                                                                                         
[8]	validation-rmse:5.82958                                     




[0]	validation-rmse:9.03343                                                                                         
[1]	validation-rmse:8.32826                                                                                         
[2]	validation-rmse:7.62109                                                                                         
[3]	validation-rmse:7.41582                                                                                         
[4]	validation-rmse:7.18957                                                                                         
[5]	validation-rmse:7.10385                                                                                         
[6]	validation-rmse:6.94087                                                                                         
[7]	validation-rmse:6.75659                                                                                         
[8]	validation-rmse:6.71646                                     




[0]	validation-rmse:8.81615                                                                                         
[1]	validation-rmse:8.55168                                                                                         
[2]	validation-rmse:8.41199                                                                                         
[3]	validation-rmse:8.35860                                                                                         
[4]	validation-rmse:8.29535                                                                                         
[5]	validation-rmse:8.23685                                                                                         
[6]	validation-rmse:8.19185                                                                                         
[7]	validation-rmse:8.16330                                                                                         
[8]	validation-rmse:8.09257                                     




[0]	validation-rmse:8.64087                                                                                         
[1]	validation-rmse:7.78885                                                                                         
[2]	validation-rmse:7.30680                                                                                         
[3]	validation-rmse:6.48587                                                                                         
[4]	validation-rmse:6.36061                                                                                         
[5]	validation-rmse:5.99509                                                                                         
[6]	validation-rmse:5.95092                                                                                         
[7]	validation-rmse:5.77288                                                                                         
[8]	validation-rmse:5.75339                                     




[0]	validation-rmse:8.08827                                                                                         
[1]	validation-rmse:6.89567                                                                                         
[2]	validation-rmse:6.64608                                                                                         
[3]	validation-rmse:6.51927                                                                                         
[4]	validation-rmse:6.11568                                                                                         
[5]	validation-rmse:6.06614                                                                                         
[6]	validation-rmse:5.93687                                                                                         
[7]	validation-rmse:5.90948                                                                                         
[8]	validation-rmse:5.80238                                     




[0]	validation-rmse:7.52078                                                                                         
[1]	validation-rmse:6.03883                                                                                         
[2]	validation-rmse:5.88794                                                                                         
[3]	validation-rmse:5.76509                                                                                         
[4]	validation-rmse:5.70753                                                                                         
[5]	validation-rmse:5.59955                                                                                         
[6]	validation-rmse:5.58330                                                                                         
[7]	validation-rmse:5.56959                                                                                         
[8]	validation-rmse:5.56175                                     




[0]	validation-rmse:8.65810                                                                                         
[1]	validation-rmse:7.79277                                                                                         
[2]	validation-rmse:6.93233                                                                                         
[3]	validation-rmse:6.65702                                                                                         
[4]	validation-rmse:6.49350                                                                                         
[5]	validation-rmse:6.05779                                                                                         
[6]	validation-rmse:5.94211                                                                                         
[7]	validation-rmse:5.90231                                                                                         
[8]	validation-rmse:5.72773                                     




[0]	validation-rmse:8.04836                                                                                         
[1]	validation-rmse:6.66915                                                                                         
[2]	validation-rmse:6.30986                                                                                         
[3]	validation-rmse:5.92632                                                                                         
[4]	validation-rmse:5.84672                                                                                         
[5]	validation-rmse:5.79236                                                                                         
[6]	validation-rmse:5.71052                                                                                         
[7]	validation-rmse:5.68843                                                                                         
[8]	validation-rmse:5.67618                                     




[0]	validation-rmse:8.79324                                                                                         
[1]	validation-rmse:7.95435                                                                                         
[2]	validation-rmse:6.95992                                                                                         
[3]	validation-rmse:6.46540                                                                                         
[4]	validation-rmse:6.30936                                                                                         
[5]	validation-rmse:6.19121                                                                                         
[6]	validation-rmse:6.11477                                                                                         
[7]	validation-rmse:6.05526                                                                                         
[8]	validation-rmse:6.02351                                     




[0]	validation-rmse:9.79546                                                                                         
[1]	validation-rmse:9.37972                                                                                         
[2]	validation-rmse:9.01718                                                                                         
[3]	validation-rmse:8.70540                                                                                         
[4]	validation-rmse:8.44217                                                                                         
[5]	validation-rmse:8.22028                                                                                         
[6]	validation-rmse:7.88356                                                                                         
[7]	validation-rmse:7.61587                                                                                         
[8]	validation-rmse:7.47234                                     




[0]	validation-rmse:9.50505                                                                                         
[1]	validation-rmse:8.91802                                                                                         
[2]	validation-rmse:8.42217                                                                                         
[3]	validation-rmse:8.13027                                                                                         
[4]	validation-rmse:7.94197                                                                                         
[5]	validation-rmse:7.72956                                                                                         
[6]	validation-rmse:7.62676                                                                                         
[7]	validation-rmse:7.51504                                                                                         
[8]	validation-rmse:7.45714                                     




[0]	validation-rmse:8.62227                                                                                         
[1]	validation-rmse:7.53460                                                                                         
[2]	validation-rmse:7.12865                                                                                         
[3]	validation-rmse:6.86708                                                                                         
[4]	validation-rmse:6.74495                                                                                         
[5]	validation-rmse:6.14272                                                                                         
[6]	validation-rmse:6.09875                                                                                         
[7]	validation-rmse:5.89215                                                                                         
[8]	validation-rmse:5.87081                                     




[0]	validation-rmse:7.08363                                                                                         
[1]	validation-rmse:5.93949                                                                                         
[2]	validation-rmse:5.78697                                                                                         
[3]	validation-rmse:5.68328                                                                                         
[4]	validation-rmse:5.56545                                                                                         
[5]	validation-rmse:5.55321                                                                                         
[6]	validation-rmse:5.53795                                                                                         
[7]	validation-rmse:5.52560                                                                                         
[8]	validation-rmse:5.51018                                     




[0]	validation-rmse:9.56954                                                                                         
[1]	validation-rmse:9.01618                                                                                         
[2]	validation-rmse:8.54797                                                                                         
[3]	validation-rmse:8.04587                                                                                         
[4]	validation-rmse:7.79703                                                                                         
[5]	validation-rmse:7.42399                                                                                         
[6]	validation-rmse:7.28643                                                                                         
[7]	validation-rmse:7.16799                                                                                         
[8]	validation-rmse:6.87975                                     




[0]	validation-rmse:7.95534                                                                                         
[1]	validation-rmse:7.19504                                                                                         
[2]	validation-rmse:6.67325                                                                                         
[3]	validation-rmse:6.10541                                                                                         
[4]	validation-rmse:6.04338                                                                                         
[5]	validation-rmse:5.98791                                                                                         
[6]	validation-rmse:5.95576                                                                                         
[7]	validation-rmse:5.65849                                                                                         
[8]	validation-rmse:5.65017                                     




[0]	validation-rmse:10.04490                                                                                        
[1]	validation-rmse:9.81892                                                                                         
[2]	validation-rmse:9.62496                                                                                         
[3]	validation-rmse:9.45839                                                                                         
[4]	validation-rmse:9.31569                                                                                         
[5]	validation-rmse:9.19355                                                                                         
[6]	validation-rmse:9.08877                                                                                         
[7]	validation-rmse:8.99926                                                                                         
[8]	validation-rmse:8.92252                                     




[0]	validation-rmse:7.77322                                                                                         
[1]	validation-rmse:6.31836                                                                                         
[2]	validation-rmse:6.02985                                                                                         
[3]	validation-rmse:5.86861                                                                                         
[4]	validation-rmse:5.63895                                                                                         
[5]	validation-rmse:5.61510                                                                                         
[6]	validation-rmse:5.59957                                                                                         
[7]	validation-rmse:5.53283                                                                                         
[8]	validation-rmse:5.52810                                     




[0]	validation-rmse:8.07596                                                                                         
[1]	validation-rmse:7.26405                                                                                         
[2]	validation-rmse:6.53271                                                                                         
[3]	validation-rmse:6.39301                                                                                         
[4]	validation-rmse:6.25457                                                                                         
[5]	validation-rmse:5.98906                                                                                         
[6]	validation-rmse:5.93310                                                                                         
[7]	validation-rmse:5.89882                                                                                         
[8]	validation-rmse:5.63766                                     




[0]	validation-rmse:9.30242                                                                                         
[1]	validation-rmse:8.59850                                                                                         
[2]	validation-rmse:8.13825                                                                                         
[3]	validation-rmse:7.41152                                                                                         
[4]	validation-rmse:7.00069                                                                                         
[5]	validation-rmse:6.84293                                                                                         
[6]	validation-rmse:6.73733                                                                                         
[7]	validation-rmse:6.41239                                                                                         
[8]	validation-rmse:6.35538                                     




[0]	validation-rmse:7.45530                                                                                         
[1]	validation-rmse:5.97460                                                                                         
[2]	validation-rmse:5.82777                                                                                         
[3]	validation-rmse:5.72795                                                                                         
[4]	validation-rmse:5.65760                                                                                         
[5]	validation-rmse:5.60768                                                                                         
[6]	validation-rmse:5.54970                                                                                         
[7]	validation-rmse:5.54191                                                                                         
[8]	validation-rmse:5.53076                                     




[0]	validation-rmse:7.62544                                                                                         
[1]	validation-rmse:6.37795                                                                                         
[2]	validation-rmse:6.15055                                                                                         
[3]	validation-rmse:6.02714                                                                                         
[4]	validation-rmse:5.76873                                                                                         
[5]	validation-rmse:5.74031                                                                                         
[6]	validation-rmse:5.69161                                                                                         
[7]	validation-rmse:5.64818                                                                                         
[8]	validation-rmse:5.59507                                     




[0]	validation-rmse:9.78524                                                                                         
[1]	validation-rmse:9.35365                                                                                         
[2]	validation-rmse:9.00400                                                                                         
[3]	validation-rmse:8.73173                                                                                         
[4]	validation-rmse:8.52716                                                                                         
[5]	validation-rmse:8.25440                                                                                         
[6]	validation-rmse:8.12178                                                                                         
[7]	validation-rmse:7.94251                                                                                         
[8]	validation-rmse:7.85005                                     




[0]	validation-rmse:8.84179                                                                                         
[1]	validation-rmse:7.71525                                                                                         
[2]	validation-rmse:7.35061                                                                                         
[3]	validation-rmse:7.14088                                                                                         
[4]	validation-rmse:6.69458                                                                                         
[5]	validation-rmse:6.60813                                                                                         
[6]	validation-rmse:6.55449                                                                                         
[7]	validation-rmse:6.48450                                                                                         
[8]	validation-rmse:6.34333                                     




[0]	validation-rmse:7.55696                                                                                         
[1]	validation-rmse:6.53377                                                                                         
[2]	validation-rmse:6.27117                                                                                         
[3]	validation-rmse:6.12650                                                                                         
[4]	validation-rmse:6.03520                                                                                         
[5]	validation-rmse:5.83268                                                                                         
[6]	validation-rmse:5.80378                                                                                         
[7]	validation-rmse:5.64389                                                                                         
[8]	validation-rmse:5.62675                                     

In [42]:
# trained params
params = {
    'max_depth': 87,
    'learning_rate': 0.29122519299691013, 
    'reg_alpha': 0.00716725908061934,
    'min_child_weight': 1.1251068416171426,
    'objective': 'reg:linear',
    'seed': 42
}

# using autolog
mlflow.xgboost.autolog()
booster = xgb.train(
    params=params,
    dtrain=train,
    num_boost_round=1000,
    evals=[(valid, "validation")],
    early_stopping_rounds=50
)

2025/03/07 09:52:24 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '76688969aea845f1816ffb954e2b1398', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current xgboost workflow


[0]	validation-rmse:7.97210
[1]	validation-rmse:7.19983




[2]	validation-rmse:6.78191
[3]	validation-rmse:6.56674
[4]	validation-rmse:6.30874
[5]	validation-rmse:6.18206
[6]	validation-rmse:6.12972
[7]	validation-rmse:6.08824
[8]	validation-rmse:6.04158
[9]	validation-rmse:6.02443
[10]	validation-rmse:6.01411
[11]	validation-rmse:5.99181
[12]	validation-rmse:5.97848
[13]	validation-rmse:5.96942
[14]	validation-rmse:5.95672
[15]	validation-rmse:5.95058
[16]	validation-rmse:5.93637
[17]	validation-rmse:5.92488
[18]	validation-rmse:5.92182
[19]	validation-rmse:5.91965
[20]	validation-rmse:5.90720
[21]	validation-rmse:5.90627
[22]	validation-rmse:5.90532
[23]	validation-rmse:5.89990
[24]	validation-rmse:5.89798
[25]	validation-rmse:5.89607
[26]	validation-rmse:5.89560
[27]	validation-rmse:5.89539
[28]	validation-rmse:5.89435
[29]	validation-rmse:5.89207
[30]	validation-rmse:5.89316
[31]	validation-rmse:5.89011
[32]	validation-rmse:5.88905
[33]	validation-rmse:5.88641
[34]	validation-rmse:5.88488
[35]	validation-rmse:5.88275
[36]	validation-rmse:5

KeyboardInterrupt: 

## Saving models

In [43]:
with mlflow.start_run():
    best_params = {
    'max_depth': 87,
    'learning_rate': 0.29122519299691013, 
    'reg_alpha': 0.00716725908061934,
    'min_child_weight': 1.1251068416171426,
    'objective': 'reg:linear',
    'seed': 42
    }

    mlflow.log_params(best_params)

    booster = xgb.train(
        params=best_params,
        dtrain=train,
        num_boost_round=300,
        evals=[(valid, "validation")],
        early_stopping_rounds=50
    )

    y_pred = booster.predict(valid)
    mse = mean_squared_error(val_duration, y_pred)
    mlflow.log_metric("rmse", mse**0.5)

    with open("models/preprocessor.b", "wb") as f_out:
        pickle.dump(dv, f_out)
        
    # log preprocessor
    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

    mlflow.xgboost.log_model(booster, artifact_path="models_mlflow")

[0]	validation-rmse:7.97210
[1]	validation-rmse:7.19983




[2]	validation-rmse:6.78191
[3]	validation-rmse:6.56674
[4]	validation-rmse:6.30874
[5]	validation-rmse:6.18206
[6]	validation-rmse:6.12972
[7]	validation-rmse:6.08824
[8]	validation-rmse:6.04158
[9]	validation-rmse:6.02443
[10]	validation-rmse:6.01411
[11]	validation-rmse:5.99181
[12]	validation-rmse:5.97848
[13]	validation-rmse:5.96942
[14]	validation-rmse:5.95672
[15]	validation-rmse:5.95058
[16]	validation-rmse:5.93637
[17]	validation-rmse:5.92488
[18]	validation-rmse:5.92182
[19]	validation-rmse:5.91965
[20]	validation-rmse:5.90720
[21]	validation-rmse:5.90627
[22]	validation-rmse:5.90532
[23]	validation-rmse:5.89990
[24]	validation-rmse:5.89798
[25]	validation-rmse:5.89607
[26]	validation-rmse:5.89560
[27]	validation-rmse:5.89539
[28]	validation-rmse:5.89435
[29]	validation-rmse:5.89207
[30]	validation-rmse:5.89316
[31]	validation-rmse:5.89011
[32]	validation-rmse:5.88905
[33]	validation-rmse:5.88641
[34]	validation-rmse:5.88488
[35]	validation-rmse:5.88275
[36]	validation-rmse:5



## Loading model

In [32]:
logged_model = 'runs:/7ced78e294ba48c9904625c91346cb01/models_mlflow'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

In [33]:
loaded_model

mlflow.pyfunc.loaded_model:
  artifact_path: models_mlflow
  flavor: mlflow.xgboost
  run_id: 7ced78e294ba48c9904625c91346cb01

In [34]:
xgboost_model = mlflow.xgboost.load_model(logged_model)

In [35]:
xgboost_model

<xgboost.core.Booster at 0x7b7439494c30>