In [2]:
import pandas as pd
import pickle
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge

from sklearn.metrics import mean_squared_error
from sklearn.metrics import root_mean_squared_error

mlflow ui --backend-store-uri sqlite:///mlflow.db

In [3]:
import os

In [23]:
import mlflow


mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("nyc-taxi-experiment")

<Experiment: artifact_location='/workspaces/mlops/02-experiment_tracking/mlruns/1', creation_time=1749562952052, experiment_id='1', last_update_time=1749562952052, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>

In [24]:
mlflow.end_run()

In [5]:
mlflow.get_artifact_uri()

'/workspaces/mlops/02-experiment_tracking/mlruns/1/4ce6d8aa93ed416ab2dfd884fbe57b10/artifacts'

In [4]:
!python -V

Python 3.13.2


In [7]:
os.getcwd()

'/workspaces/mlops/02-experiment_tracking'

In [6]:
def read_dataframe(filename):
    df = pd.read_parquet(filename)

    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df

In [7]:
ls ..

[0m[34;42m01-intro[0m/  [34;42m02-experiment_tracking[0m/  LICENSE  README.md  [34;42mdata[0m/  mlflow.db


In [10]:
!ls ../data

green_tripdata_2021-01.parquet	green_tripdata_2021-02.parquet


In [8]:
df_train = read_dataframe('../data/green_tripdata_2021-01.parquet')
df_val = read_dataframe('../data/green_tripdata_2021-02.parquet')

In [9]:
len(df_train), len(df_val)

(73908, 61921)

In [10]:
df_train.dtypes

VendorID                          int64
lpep_pickup_datetime     datetime64[us]
lpep_dropoff_datetime    datetime64[us]
store_and_fwd_flag               object
RatecodeID                      float64
PULocationID                     object
DOLocationID                     object
passenger_count                 float64
trip_distance                   float64
fare_amount                     float64
extra                           float64
mta_tax                         float64
tip_amount                      float64
tolls_amount                    float64
ehail_fee                        object
improvement_surcharge           float64
total_amount                    float64
payment_type                    float64
trip_type                       float64
congestion_surcharge            float64
duration                        float64
dtype: object

In [11]:
df_train.iloc[0]

VendorID                                   2
lpep_pickup_datetime     2021-01-01 00:15:56
lpep_dropoff_datetime    2021-01-01 00:19:52
store_and_fwd_flag                         N
RatecodeID                               1.0
PULocationID                              43
DOLocationID                             151
passenger_count                          1.0
trip_distance                           1.01
fare_amount                              5.5
extra                                    0.5
mta_tax                                  0.5
tip_amount                               0.0
tolls_amount                             0.0
ehail_fee                               None
improvement_surcharge                    0.3
total_amount                             6.8
payment_type                             2.0
trip_type                                1.0
congestion_surcharge                     0.0
duration                            3.933333
Name: 0, dtype: object

In [12]:
df_train['PU_DO'] = df_train['PULocationID'] + '_' + df_train['DOLocationID']
df_val['PU_DO'] = df_val['PULocationID'] + '_' + df_val['DOLocationID']

In [13]:
categorical = ['PU_DO'] #'PULocationID', 'DOLocationID']
numerical = ['trip_distance']

dv = DictVectorizer()

train_dicts = df_train[categorical + numerical].to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)

val_dicts = df_val[categorical + numerical].to_dict(orient='records')
X_val = dv.transform(val_dicts)

In [14]:
target = 'duration'
y_train = df_train[target].values
y_val = df_val[target].values

In [15]:
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_val)

root_mean_squared_error(y_val, y_pred)

7.758715208009878

In [15]:
with open('models/lin_reg.bin', 'wb') as f_out:
    pickle.dump((dv, lr), f_out)

In [None]:

# mlflow.end_run()

In [16]:
with mlflow.start_run():

    mlflow.set_tag("developer", "sharaf")

    mlflow.log_param("train-data-path", "../data/green_tripdata_2021-01.csv")
    mlflow.log_param("valid-data-path", "../data/green_tripdata_2021-02.csv")

    alpha = 0.1
    mlflow.log_param("alpha", alpha)
    lr = Lasso(alpha)
    lr.fit(X_train, y_train)

    y_pred = lr.predict(X_val)
    rmse = root_mean_squared_error(y_val, y_pred)
    mlflow.log_metric("rmse", rmse)
  
    mlflow.log_artifact(local_path="models/lin_reg.bin", artifact_path="models_pickle")

In [16]:
dv.feature_names_

['PU_DO=100_168',
 'PU_DO=100_180',
 'PU_DO=100_190',
 'PU_DO=100_225',
 'PU_DO=100_235',
 'PU_DO=100_42',
 'PU_DO=100_49',
 'PU_DO=100_69',
 'PU_DO=100_82',
 'PU_DO=100_89',
 'PU_DO=101_101',
 'PU_DO=101_13',
 'PU_DO=101_135',
 'PU_DO=101_139',
 'PU_DO=101_16',
 'PU_DO=101_175',
 'PU_DO=101_188',
 'PU_DO=101_19',
 'PU_DO=101_191',
 'PU_DO=101_196',
 'PU_DO=101_205',
 'PU_DO=101_218',
 'PU_DO=101_219',
 'PU_DO=101_226',
 'PU_DO=101_260',
 'PU_DO=101_265',
 'PU_DO=101_64',
 'PU_DO=101_7',
 'PU_DO=101_75',
 'PU_DO=101_86',
 'PU_DO=101_95',
 'PU_DO=102_102',
 'PU_DO=102_117',
 'PU_DO=102_121',
 'PU_DO=102_130',
 'PU_DO=102_131',
 'PU_DO=102_134',
 'PU_DO=102_139',
 'PU_DO=102_145',
 'PU_DO=102_16',
 'PU_DO=102_160',
 'PU_DO=102_164',
 'PU_DO=102_182',
 'PU_DO=102_196',
 'PU_DO=102_197',
 'PU_DO=102_198',
 'PU_DO=102_205',
 'PU_DO=102_21',
 'PU_DO=102_219',
 'PU_DO=102_235',
 'PU_DO=102_236',
 'PU_DO=102_242',
 'PU_DO=102_249',
 'PU_DO=102_258',
 'PU_DO=102_263',
 'PU_DO=102_28',
 'PU_DO=1

In [17]:
import xgboost as xgb
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [19]:
train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)

In [20]:
train, valid

(<xgboost.core.DMatrix at 0x76cc90848cd0>,
 <xgboost.core.DMatrix at 0x76cca7b110f0>)

In [21]:
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)
        booster = xgb.train(
            params=params,
            dtrain=train,
            num_boost_round=1000,
            evals=[(valid, 'validation')],
            early_stopping_rounds=50
        )
        y_pred = booster.predict(valid)
        rmse = root_mean_squared_error(y_val, y_pred)
        mlflow.log_metric("rmse", rmse)

    return {'loss': rmse, 'status': STATUS_OK}

In [None]:
# search_space = {
#     'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
#     'learning_rate': hp.loguniform('learning_rate', -3, 0),
#     'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
#     'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
#     'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
#     'objective': 'reg:linear',
#     'seed': 42
# }

# best_result = fmin(
#     fn=objective,
#     space=search_space,
#     algo=tpe.suggest,
#     max_evals=50,
#     trials=Trials()
# )

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.40043                           
[1]	validation-rmse:7.19202                           
[2]	validation-rmse:6.83083                           
[3]	validation-rmse:6.70396                           
[4]	validation-rmse:6.65497                           
[5]	validation-rmse:6.62963                           
[6]	validation-rmse:6.61334                           
[7]	validation-rmse:6.59757                           
[8]	validation-rmse:6.59347                           
[9]	validation-rmse:6.58977                           
[10]	validation-rmse:6.58764                          
[11]	validation-rmse:6.58465                          
[12]	validation-rmse:6.58262                          
[13]	validation-rmse:6.57349                          
[14]	validation-rmse:6.57114                          
[15]	validation-rmse:6.57004                          
[16]	validation-rmse:6.56834                          
[17]	validation-rmse:6.56679                          
[18]	valid

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.66341                                                      
[1]	validation-rmse:7.27266                                                      
[2]	validation-rmse:6.77562                                                      
[3]	validation-rmse:6.58587                                                      
[4]	validation-rmse:6.51014                                                      
[5]	validation-rmse:6.46716                                                      
[6]	validation-rmse:6.44463                                                      
[7]	validation-rmse:6.43295                                                      
[8]	validation-rmse:6.42797                                                      
[9]	validation-rmse:6.42215                                                      
[10]	validation-rmse:6.41860                                                     
[11]	validation-rmse:6.41338                                                     
[12]	validation-

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:6.83321                                                      
[1]	validation-rmse:6.62406                                                      
[2]	validation-rmse:6.60531                                                      
[3]	validation-rmse:6.59521                                                      
[4]	validation-rmse:6.58117                                                      
[5]	validation-rmse:6.57317                                                      
[6]	validation-rmse:6.56129                                                      
[7]	validation-rmse:6.55553                                                      
[8]	validation-rmse:6.54884                                                      
[9]	validation-rmse:6.53832                                                      
[10]	validation-rmse:6.52879                                                     
[11]	validation-rmse:6.52288                                                     
[12]	validation-

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.75244                                                   
[1]	validation-rmse:9.64220                                                    
[2]	validation-rmse:8.80619                                                    
[3]	validation-rmse:8.19001                                                    
[4]	validation-rmse:7.74106                                                    
[5]	validation-rmse:7.41452                                                    
[6]	validation-rmse:7.18137                                                    
[7]	validation-rmse:7.00944                                                    
[8]	validation-rmse:6.88265                                                    
[9]	validation-rmse:6.79107                                                    
[10]	validation-rmse:6.72101                                                   
[11]	validation-rmse:6.66957                                                   
[12]	validation-rmse:6.62777            

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.87612                                                     
[1]	validation-rmse:9.83696                                                      
[2]	validation-rmse:9.03739                                                      
[3]	validation-rmse:8.43099                                                      
[4]	validation-rmse:7.97520                                                      
[5]	validation-rmse:7.63688                                                      
[6]	validation-rmse:7.38393                                                      
[7]	validation-rmse:7.19729                                                      
[8]	validation-rmse:7.05690                                                      
[9]	validation-rmse:6.95240                                                      
[10]	validation-rmse:6.87424                                                     
[11]	validation-rmse:6.81372                                                     
[12]	validation-

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.44332                                                      
[1]	validation-rmse:10.77001                                                      
[2]	validation-rmse:10.18343                                                      
[3]	validation-rmse:9.67368                                                       
[4]	validation-rmse:9.23304                                                       
[5]	validation-rmse:8.85280                                                       
[6]	validation-rmse:8.52712                                                       
[7]	validation-rmse:8.24769                                                       
[8]	validation-rmse:8.00941                                                       
[9]	validation-rmse:7.80650                                                       
[10]	validation-rmse:7.63394                                                      
[11]	validation-rmse:7.48679                                                      
[12]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:9.16585                                                       
[1]	validation-rmse:7.74674                                                       
[2]	validation-rmse:7.12442                                                       
[3]	validation-rmse:6.83604                                                       
[4]	validation-rmse:6.70588                                                       
[5]	validation-rmse:6.63251                                                       
[6]	validation-rmse:6.59208                                                       
[7]	validation-rmse:6.57176                                                       
[8]	validation-rmse:6.55951                                                       
[9]	validation-rmse:6.54926                                                       
[10]	validation-rmse:6.54327                                                      
[11]	validation-rmse:6.53847                                                      
[12]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.99470                                                       
[1]	validation-rmse:7.54819                                                       
[2]	validation-rmse:6.94952                                                       
[3]	validation-rmse:6.69847                                                       
[4]	validation-rmse:6.57952                                                       
[5]	validation-rmse:6.52597                                                       
[6]	validation-rmse:6.49235                                                       
[7]	validation-rmse:6.47163                                                       
[8]	validation-rmse:6.45091                                                       
[9]	validation-rmse:6.44298                                                       
[10]	validation-rmse:6.43715                                                      
[11]	validation-rmse:6.43238                                                      
[12]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.67359                                                       
[1]	validation-rmse:7.29211                                                       
[2]	validation-rmse:6.79552                                                       
[3]	validation-rmse:6.60728                                                       
[4]	validation-rmse:6.52632                                                       
[5]	validation-rmse:6.49003                                                       
[6]	validation-rmse:6.46097                                                       
[7]	validation-rmse:6.44935                                                       
[8]	validation-rmse:6.43949                                                       
[9]	validation-rmse:6.43667                                                       
[10]	validation-rmse:6.43235                                                      
[11]	validation-rmse:6.42629                                                      
[12]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.17867                                                      
[1]	validation-rmse:8.84257                                                       
[2]	validation-rmse:7.98711                                                       
[3]	validation-rmse:7.45544                                                       
[4]	validation-rmse:7.12439                                                       
[5]	validation-rmse:6.91856                                                       
[6]	validation-rmse:6.79317                                                       
[7]	validation-rmse:6.71153                                                       
[8]	validation-rmse:6.65636                                                       
[9]	validation-rmse:6.61902                                                       
[10]	validation-rmse:6.59260                                                      
[11]	validation-rmse:6.57288                                                      
[12]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.07747                                                       
[1]	validation-rmse:10.14748                                                       
[2]	validation-rmse:9.39512                                                        
[3]	validation-rmse:8.78737                                                        
[4]	validation-rmse:8.30353                                                        
[5]	validation-rmse:7.92026                                                        
[6]	validation-rmse:7.61712                                                        
[7]	validation-rmse:7.37910                                                        
[8]	validation-rmse:7.19156                                                        
[9]	validation-rmse:7.04281                                                        
[10]	validation-rmse:6.92569                                                       
[11]	validation-rmse:6.83334                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.61418                                                       
[1]	validation-rmse:9.43654                                                        
[2]	validation-rmse:8.58384                                                        
[3]	validation-rmse:7.97728                                                        
[4]	validation-rmse:7.55257                                                        
[5]	validation-rmse:7.25287                                                        
[6]	validation-rmse:7.04481                                                        
[7]	validation-rmse:6.89749                                                        
[8]	validation-rmse:6.79169                                                        
[9]	validation-rmse:6.71709                                                        
[10]	validation-rmse:6.66172                                                       
[11]	validation-rmse:6.62204                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.35731                                                       
[1]	validation-rmse:10.61976                                                       
[2]	validation-rmse:9.98805                                                        
[3]	validation-rmse:9.44934                                                        
[4]	validation-rmse:8.99160                                                        
[5]	validation-rmse:8.60490                                                        
[6]	validation-rmse:8.27771                                                        
[7]	validation-rmse:8.00255                                                        
[8]	validation-rmse:7.77171                                                        
[9]	validation-rmse:7.57926                                                        
[10]	validation-rmse:7.41834                                                       
[11]	validation-rmse:7.28385                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.35826                                                       
[1]	validation-rmse:10.62523                                                       
[2]	validation-rmse:9.99260                                                        
[3]	validation-rmse:9.45790                                                        
[4]	validation-rmse:9.00043                                                        
[5]	validation-rmse:8.61845                                                        
[6]	validation-rmse:8.29406                                                        
[7]	validation-rmse:8.02028                                                        
[8]	validation-rmse:7.79064                                                        
[9]	validation-rmse:7.60228                                                        
[10]	validation-rmse:7.43984                                                       
[11]	validation-rmse:7.30893                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.57086                                                       
[1]	validation-rmse:10.99513                                                       
[2]	validation-rmse:10.48094                                                       
[3]	validation-rmse:10.02263                                                       
[4]	validation-rmse:9.61465                                                        
[5]	validation-rmse:9.25271                                                        
[6]	validation-rmse:8.93329                                                        
[7]	validation-rmse:8.65153                                                        
[8]	validation-rmse:8.40340                                                        
[9]	validation-rmse:8.18487                                                        
[10]	validation-rmse:7.99330                                                       
[11]	validation-rmse:7.82581                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:6.97481                                                        
[1]	validation-rmse:6.58558                                                        
[2]	validation-rmse:6.53429                                                        
[3]	validation-rmse:6.52069                                                        
[4]	validation-rmse:6.51206                                                        
[5]	validation-rmse:6.50237                                                        
[6]	validation-rmse:6.49219                                                        
[7]	validation-rmse:6.48310                                                        
[8]	validation-rmse:6.47661                                                        
[9]	validation-rmse:6.47304                                                        
[10]	validation-rmse:6.46732                                                       
[11]	validation-rmse:6.45959                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.65611                                                       
[1]	validation-rmse:11.14929                                                       
[2]	validation-rmse:10.69140                                                       
[3]	validation-rmse:10.27178                                                       
[4]	validation-rmse:9.89317                                                        
[5]	validation-rmse:9.55167                                                        
[6]	validation-rmse:9.24328                                                        
[7]	validation-rmse:8.96571                                                        
[8]	validation-rmse:8.71461                                                        
[9]	validation-rmse:8.49019                                                        
[10]	validation-rmse:8.28731                                                       
[11]	validation-rmse:8.10753                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:6.90062                                                        
[1]	validation-rmse:6.74763                                                        
[2]	validation-rmse:6.72836                                                        
[3]	validation-rmse:6.72146                                                        
[4]	validation-rmse:6.71373                                                        
[5]	validation-rmse:6.71067                                                        
[6]	validation-rmse:6.70273                                                        
[7]	validation-rmse:6.69377                                                        
[8]	validation-rmse:6.68801                                                        
[9]	validation-rmse:6.68521                                                        
[10]	validation-rmse:6.68432                                                       
[11]	validation-rmse:6.68724                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.28646                                                        
[1]	validation-rmse:6.77598                                                        
[2]	validation-rmse:6.69778                                                        
[3]	validation-rmse:6.68659                                                        
[4]	validation-rmse:6.67903                                                        
[5]	validation-rmse:6.67057                                                        
[6]	validation-rmse:6.66358                                                        
[7]	validation-rmse:6.65407                                                        
[8]	validation-rmse:6.64047                                                        
[9]	validation-rmse:6.63837                                                        
[10]	validation-rmse:6.63460                                                       
[11]	validation-rmse:6.63181                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.63621                                                       
[1]	validation-rmse:11.11290                                                       
[2]	validation-rmse:10.63894                                                       
[3]	validation-rmse:10.21045                                                       
[4]	validation-rmse:9.82403                                                        
[5]	validation-rmse:9.47601                                                        
[6]	validation-rmse:9.16333                                                        
[7]	validation-rmse:8.88293                                                        
[8]	validation-rmse:8.63208                                                        
[9]	validation-rmse:8.40758                                                        
[10]	validation-rmse:8.20816                                                       
[11]	validation-rmse:8.02978                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.22145                                                       
[1]	validation-rmse:8.90261                                                        
[2]	validation-rmse:8.05170                                                        
[3]	validation-rmse:7.51900                                                        
[4]	validation-rmse:7.18786                                                        
[5]	validation-rmse:6.98229                                                        
[6]	validation-rmse:6.85319                                                        
[7]	validation-rmse:6.77167                                                        
[8]	validation-rmse:6.71570                                                        
[9]	validation-rmse:6.67833                                                        
[10]	validation-rmse:6.65320                                                       
[11]	validation-rmse:6.63302                                                

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:11.44835                                                       
[2]	validation-rmse:11.10334                                                       
[3]	validation-rmse:10.78103                                                       
[4]	validation-rmse:10.48020                                                       
[5]	validation-rmse:10.20088                                                       
[6]	validation-rmse:9.94036                                                        
[7]	validation-rmse:9.69794                                                        
[8]	validation-rmse:9.47302                                                        
[9]	validation-rmse:9.26412                                                        
[10]	validation-rmse:9.07054                                                       
[11]	validation-rmse:8.89109                                                       
[12]	validation-rmse:8.72450                                                

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:9.72299                                                        
[2]	validation-rmse:8.91805                                                        
[3]	validation-rmse:8.32382                                                        
[4]	validation-rmse:7.88876                                                        
[5]	validation-rmse:7.57265                                                        
[6]	validation-rmse:7.34338                                                        
[7]	validation-rmse:7.17913                                                        
[8]	validation-rmse:7.06039                                                        
[9]	validation-rmse:6.97079                                                        
[10]	validation-rmse:6.90507                                                       
[11]	validation-rmse:6.85882                                                       
[12]	validation-rmse:6.82481                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:9.72833                                                      
[1]	validation-rmse:8.33038                                                      
[2]	validation-rmse:7.58449                                                      
[3]	validation-rmse:7.19270                                                      
[4]	validation-rmse:6.99083                                                      
[5]	validation-rmse:6.88254                                                      
[6]	validation-rmse:6.81831                                                      
[7]	validation-rmse:6.77864                                                      
[8]	validation-rmse:6.75329                                                      
[9]	validation-rmse:6.73354                                                      
[10]	validation-rmse:6.72119                                                     
[11]	validation-rmse:6.71217                                                     
[12]	validation-

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.05306                                                     
[1]	validation-rmse:10.10996                                                     
[2]	validation-rmse:9.35123                                                      
[3]	validation-rmse:8.74450                                                      
[4]	validation-rmse:8.26322                                                      
[5]	validation-rmse:7.88668                                                      
[6]	validation-rmse:7.58943                                                      
[7]	validation-rmse:7.35740                                                      
[8]	validation-rmse:7.17650                                                      
[9]	validation-rmse:7.03588                                                      
[10]	validation-rmse:6.92559                                                     
[11]	validation-rmse:6.84155                                                     
[12]	validation-

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.54980                                                         
[1]	validation-rmse:9.35764                                                          
[2]	validation-rmse:8.51552                                                          
[3]	validation-rmse:7.93659                                                          
[4]	validation-rmse:7.53959                                                          
[5]	validation-rmse:7.27181                                                          
[6]	validation-rmse:7.08708                                                          
[7]	validation-rmse:6.96346                                                          
[8]	validation-rmse:6.87422                                                          
[9]	validation-rmse:6.81245                                                          
[10]	validation-rmse:6.76711                                                         
[11]	validation-rmse:6.73696                          

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:9.76653                                                          
[1]	validation-rmse:8.33987                                                          
[2]	validation-rmse:7.54584                                                          
[3]	validation-rmse:7.11448                                                          
[4]	validation-rmse:6.87070                                                          
[5]	validation-rmse:6.73840                                                          
[6]	validation-rmse:6.65782                                                          
[7]	validation-rmse:6.60913                                                          
[8]	validation-rmse:6.57549                                                          
[9]	validation-rmse:6.55542                                                          
[10]	validation-rmse:6.53918                                                         
[11]	validation-rmse:6.52750                          

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.19141                                                       
[1]	validation-rmse:10.34336                                                       
[2]	validation-rmse:9.64445                                                        
[3]	validation-rmse:9.07272                                                        
[4]	validation-rmse:8.60826                                                        
[5]	validation-rmse:8.23333                                                        
[6]	validation-rmse:7.93160                                                        
[7]	validation-rmse:7.68937                                                        
[8]	validation-rmse:7.49671                                                        
[9]	validation-rmse:7.34306                                                        
[10]	validation-rmse:7.21964                                                       
[11]	validation-rmse:7.12090                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.60142                                                        
[1]	validation-rmse:6.81870                                                        
[2]	validation-rmse:6.67128                                                        
[3]	validation-rmse:6.62225                                                        
[4]	validation-rmse:6.61094                                                        
[5]	validation-rmse:6.59962                                                        
[6]	validation-rmse:6.59261                                                        
[7]	validation-rmse:6.58473                                                        
[8]	validation-rmse:6.57872                                                        
[9]	validation-rmse:6.57046                                                        
[10]	validation-rmse:6.56676                                                       
[11]	validation-rmse:6.56199                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.77897                                                       
[1]	validation-rmse:11.37522                                                       
[2]	validation-rmse:11.00017                                                       
[3]	validation-rmse:10.65219                                                       
[4]	validation-rmse:10.32919                                                       
[5]	validation-rmse:10.03064                                                       
[6]	validation-rmse:9.75300                                                        
[7]	validation-rmse:9.49843                                                        
[8]	validation-rmse:9.26183                                                        
[9]	validation-rmse:9.04361                                                        
[10]	validation-rmse:8.84279                                                       
[11]	validation-rmse:8.65809                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.98852                                                          
[1]	validation-rmse:6.97688                                                          
[2]	validation-rmse:6.72363                                                          
[3]	validation-rmse:6.64874                                                          
[4]	validation-rmse:6.61710                                                          
[5]	validation-rmse:6.60398                                                          
[6]	validation-rmse:6.59753                                                          
[7]	validation-rmse:6.59084                                                          
[8]	validation-rmse:6.58281                                                          
[9]	validation-rmse:6.58005                                                          
[10]	validation-rmse:6.57047                                                         
[11]	validation-rmse:6.56562                          

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.59681                                                       
[1]	validation-rmse:9.41399                                                        
[2]	validation-rmse:8.56104                                                        
[3]	validation-rmse:7.96069                                                        
[4]	validation-rmse:7.54095                                                        
[5]	validation-rmse:7.24651                                                        
[6]	validation-rmse:7.04213                                                        
[7]	validation-rmse:6.90104                                                        
[8]	validation-rmse:6.80074                                                        
[9]	validation-rmse:6.72777                                                        
[10]	validation-rmse:6.67439                                                       
[11]	validation-rmse:6.63502                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.16173                                                       
[1]	validation-rmse:8.81392                                                        
[2]	validation-rmse:7.95503                                                        
[3]	validation-rmse:7.41742                                                        
[4]	validation-rmse:7.08951                                                        
[5]	validation-rmse:6.88706                                                        
[6]	validation-rmse:6.76109                                                        
[7]	validation-rmse:6.68101                                                        
[8]	validation-rmse:6.62814                                                        
[9]	validation-rmse:6.59168                                                        
[10]	validation-rmse:6.56504                                                       
[11]	validation-rmse:6.54792                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:9.66892                                                        
[1]	validation-rmse:8.22395                                                        
[2]	validation-rmse:7.44509                                                        
[3]	validation-rmse:7.02814                                                        
[4]	validation-rmse:6.80452                                                        
[5]	validation-rmse:6.68250                                                        
[6]	validation-rmse:6.61160                                                        
[7]	validation-rmse:6.56929                                                        
[8]	validation-rmse:6.53883                                                        
[9]	validation-rmse:6.52125                                                        
[10]	validation-rmse:6.50791                                                       
[11]	validation-rmse:6.49911                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.87927                                                       
[1]	validation-rmse:9.84364                                                        
[2]	validation-rmse:9.04995                                                        
[3]	validation-rmse:8.44745                                                        
[4]	validation-rmse:7.99697                                                        
[5]	validation-rmse:7.66249                                                        
[6]	validation-rmse:7.41321                                                        
[7]	validation-rmse:7.22921                                                        
[8]	validation-rmse:7.09459                                                        
[9]	validation-rmse:6.99437                                                        
[10]	validation-rmse:6.91899                                                       
[11]	validation-rmse:6.86138                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.21122                                                       
[1]	validation-rmse:10.37224                                                       
[2]	validation-rmse:9.67341                                                        
[3]	validation-rmse:9.09699                                                        
[4]	validation-rmse:8.62201                                                        
[5]	validation-rmse:8.23488                                                        
[6]	validation-rmse:7.91613                                                        
[7]	validation-rmse:7.66050                                                        
[8]	validation-rmse:7.45190                                                        
[9]	validation-rmse:7.28469                                                        
[10]	validation-rmse:7.14911                                                       
[11]	validation-rmse:7.03786                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.47886                                                       
[1]	validation-rmse:10.83266                                                       
[2]	validation-rmse:10.26530                                                       
[3]	validation-rmse:9.76731                                                        
[4]	validation-rmse:9.33321                                                        
[5]	validation-rmse:8.95655                                                        
[6]	validation-rmse:8.62765                                                        
[7]	validation-rmse:8.34552                                                        
[8]	validation-rmse:8.10053                                                        
[9]	validation-rmse:7.89127                                                        
[10]	validation-rmse:7.71072                                                       
[11]	validation-rmse:7.55654                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.76081                                                       
[1]	validation-rmse:11.34176                                                       
[2]	validation-rmse:10.95400                                                       
[3]	validation-rmse:10.59558                                                       
[4]	validation-rmse:10.26478                                                       
[5]	validation-rmse:9.95974                                                        
[6]	validation-rmse:9.67852                                                        
[7]	validation-rmse:9.41991                                                        
[8]	validation-rmse:9.18258                                                        
[9]	validation-rmse:8.96466                                                        
[10]	validation-rmse:8.76512                                                       
[11]	validation-rmse:8.58218                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.20275                                                       
[1]	validation-rmse:10.36051                                                       
[2]	validation-rmse:9.66374                                                        
[3]	validation-rmse:9.09276                                                        
[4]	validation-rmse:8.62556                                                        
[5]	validation-rmse:8.24710                                                        
[6]	validation-rmse:7.94191                                                        
[7]	validation-rmse:7.69331                                                        
[8]	validation-rmse:7.49501                                                        
[9]	validation-rmse:7.33371                                                        
[10]	validation-rmse:7.20598                                                       
[11]	validation-rmse:7.10356                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.00391                                                       
[1]	validation-rmse:10.03362                                                       
[2]	validation-rmse:9.25991                                                        
[3]	validation-rmse:8.64868                                                        
[4]	validation-rmse:8.17178                                                        
[5]	validation-rmse:7.79869                                                        
[6]	validation-rmse:7.51120                                                        
[7]	validation-rmse:7.29032                                                        
[8]	validation-rmse:7.11922                                                        
[9]	validation-rmse:6.98755                                                        
[10]	validation-rmse:6.88405                                                       
[11]	validation-rmse:6.80498                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.34005                                                       
[1]	validation-rmse:10.58865                                                       
[2]	validation-rmse:9.94426                                                        
[3]	validation-rmse:9.39535                                                        
[4]	validation-rmse:8.92799                                                        
[5]	validation-rmse:8.53299                                                        
[6]	validation-rmse:8.19981                                                        
[7]	validation-rmse:7.92068                                                        
[8]	validation-rmse:7.68628                                                        
[9]	validation-rmse:7.49253                                                        
[10]	validation-rmse:7.33183                                                       
[11]	validation-rmse:7.19600                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.71664                                                       
[1]	validation-rmse:11.25882                                                       
[2]	validation-rmse:10.83712                                                       
[3]	validation-rmse:10.44928                                                       
[4]	validation-rmse:10.09326                                                       
[5]	validation-rmse:9.76626                                                        
[6]	validation-rmse:9.46674                                                        
[7]	validation-rmse:9.19354                                                        
[8]	validation-rmse:8.94296                                                        
[9]	validation-rmse:8.71436                                                        
[10]	validation-rmse:8.50592                                                       
[11]	validation-rmse:8.31691                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:9.20227                                                        
[1]	validation-rmse:7.76077                                                        
[2]	validation-rmse:7.11440                                                        
[3]	validation-rmse:6.82550                                                        
[4]	validation-rmse:6.69095                                                        
[5]	validation-rmse:6.62039                                                        
[6]	validation-rmse:6.58052                                                        
[7]	validation-rmse:6.56009                                                        
[8]	validation-rmse:6.54681                                                        
[9]	validation-rmse:6.53736                                                        
[10]	validation-rmse:6.52931                                                       
[11]	validation-rmse:6.52736                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.32568                                                       
[1]	validation-rmse:9.03175                                                        
[2]	validation-rmse:8.16740                                                        
[3]	validation-rmse:7.60415                                                        
[4]	validation-rmse:7.24080                                                        
[5]	validation-rmse:7.00736                                                        
[6]	validation-rmse:6.85797                                                        
[7]	validation-rmse:6.75707                                                        
[8]	validation-rmse:6.68904                                                        
[9]	validation-rmse:6.64366                                                        
[10]	validation-rmse:6.61218                                                       
[11]	validation-rmse:6.58933                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.26144                                                       
[1]	validation-rmse:10.45869                                                       
[2]	validation-rmse:9.78523                                                        
[3]	validation-rmse:9.22277                                                        
[4]	validation-rmse:8.75611                                                        
[5]	validation-rmse:8.37314                                                        
[6]	validation-rmse:8.05458                                                        
[7]	validation-rmse:7.79854                                                        
[8]	validation-rmse:7.58386                                                        
[9]	validation-rmse:7.41021                                                        
[10]	validation-rmse:7.27075                                                       
[11]	validation-rmse:7.15491                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.52480                                                       
[1]	validation-rmse:10.91513                                                       
[2]	validation-rmse:10.37409                                                       
[3]	validation-rmse:9.89407                                                        
[4]	validation-rmse:9.47473                                                        
[5]	validation-rmse:9.10449                                                        
[6]	validation-rmse:8.77710                                                        
[7]	validation-rmse:8.49347                                                        
[8]	validation-rmse:8.24316                                                        
[9]	validation-rmse:8.02472                                                        
[10]	validation-rmse:7.83640                                                       
[11]	validation-rmse:7.67551                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.93972                                                       
[1]	validation-rmse:9.93508                                                        
[2]	validation-rmse:9.14984                                                        
[3]	validation-rmse:8.54337                                                        
[4]	validation-rmse:8.08028                                                        
[5]	validation-rmse:7.72852                                                        
[6]	validation-rmse:7.46254                                                        
[7]	validation-rmse:7.26212                                                        
[8]	validation-rmse:7.11022                                                        
[9]	validation-rmse:6.99448                                                        
[10]	validation-rmse:6.90499                                                       
[11]	validation-rmse:6.83453                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:11.62465                                                       
[1]	validation-rmse:11.09206                                                       
[2]	validation-rmse:10.61154                                                       
[3]	validation-rmse:10.17798                                                       
[4]	validation-rmse:9.78755                                                        
[5]	validation-rmse:9.43800                                                        
[6]	validation-rmse:9.12294                                                        
[7]	validation-rmse:8.84209                                                        
[8]	validation-rmse:8.59031                                                        
[9]	validation-rmse:8.36754                                                        
[10]	validation-rmse:8.16813                                                       
[11]	validation-rmse:7.99160                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.52316                                                        
[1]	validation-rmse:7.19836                                                        
[2]	validation-rmse:6.75609                                                        
[3]	validation-rmse:6.59139                                                        
[4]	validation-rmse:6.52598                                                        
[5]	validation-rmse:6.49357                                                        
[6]	validation-rmse:6.47072                                                        
[7]	validation-rmse:6.45595                                                        
[8]	validation-rmse:6.45068                                                        
[9]	validation-rmse:6.44552                                                        
[10]	validation-rmse:6.44126                                                       
[11]	validation-rmse:6.43687                                                

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:10.00826                                                       
[1]	validation-rmse:8.63930                                                        
[2]	validation-rmse:7.82120                                                        
[3]	validation-rmse:7.34420                                                        
[4]	validation-rmse:7.06595                                                        
[5]	validation-rmse:6.90151                                                        
[6]	validation-rmse:6.80365                                                        
[7]	validation-rmse:6.74194                                                        
[8]	validation-rmse:6.70189                                                        
[9]	validation-rmse:6.67253                                                        
[10]	validation-rmse:6.65503                                                       
[11]	validation-rmse:6.64086                                                

In [None]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.svm import LinearSVR

mlflow.sklearn.autolog()

for model_class in (RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor, LinearSVR):

    with mlflow.start_run():

        mlflow.log_param("train-data-path", "./data/green_tripdata_2021-01.csv")
        mlflow.log_param("valid-data-path", "./data/green_tripdata_2021-02.csv")
        mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

        mlmodel = model_class()
        mlmodel.fit(X_train, y_train)

        y_pred = mlmodel.predict(X_val)
        rmse = root_mean_squared_error(y_val, y_pred)
        mlflow.log_metric("rmse", rmse)
        

In [22]:
import os
os.getcwd()

'/workspaces/mlops/02-experiment_tracking'

In [6]:
!ls

duration_prediction.ipynb  mlflow.db  mlruns  models  requirements.txt


In [25]:
dv.get_feature_names_out()

array(['PU_DO=100_168', 'PU_DO=100_180', 'PU_DO=100_190', ...,
       'PU_DO=9_97', 'PU_DO=9_98', 'trip_distance'],
      shape=(13221,), dtype=object)

In [23]:
dv.get_feature_names_out()

array(['PU_DO=100_168', 'PU_DO=100_180', 'PU_DO=100_190', ...,
       'PU_DO=9_97', 'PU_DO=9_98', 'trip_distance'],
      shape=(13221,), dtype=object)

In [21]:
X_val[0]

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 2 stored elements and shape (1, 13221)>

In [24]:
mlflow.xgboost.autolog(disable=True)

In [27]:
with mlflow.start_run():
    
    train = xgb.DMatrix(X_train, label=y_train)
    valid = xgb.DMatrix(X_val, label=y_val)

    best_params = {
        'learning_rate': 0.09585355369315604,
        'max_depth': 30,
        'min_child_weight': 1.060597050922164,
        'objective': 'reg:linear',
        'reg_alpha': 0.018060244040060163,
        'reg_lambda': 0.011658731377413597,
        'seed': 42
    }

    mlflow.log_params(best_params)

    booster = xgb.train(
        params=best_params,
        dtrain=train,
        num_boost_round=1000,
        evals=[(valid, 'validation')],
        early_stopping_rounds=50
    )

    y_pred = booster.predict(valid)
    rmse = root_mean_squared_error(y_val, y_pred)
    mlflow.log_metric("rmse", rmse)

    with open("models/preprocessor.b", "wb") as f_out:
        pickle.dump(dv, f_out)
    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

    # mlflow.xgboost.log_model(booster, artifact_path="models_mlflow", registered_model_name="best_xgboost_model", input_example=X_val[:5], signature=dv.get_feature_names_out())
    mlflow.xgboost.log_model(booster, artifact_path="models_mlflow", registered_model_name="best_xgboost_model", input_example=X_val[0])

  self.starting_round = model.num_boosted_rounds()


[0]	validation-rmse:11.44482
[1]	validation-rmse:10.77202
[2]	validation-rmse:10.18363
[3]	validation-rmse:9.67396
[4]	validation-rmse:9.23166
[5]	validation-rmse:8.84808
[6]	validation-rmse:8.51883
[7]	validation-rmse:8.23597
[8]	validation-rmse:7.99320
[9]	validation-rmse:7.78709
[10]	validation-rmse:7.61022
[11]	validation-rmse:7.45952
[12]	validation-rmse:7.33049
[13]	validation-rmse:7.22098
[14]	validation-rmse:7.12713
[15]	validation-rmse:7.04752
[16]	validation-rmse:6.98005
[17]	validation-rmse:6.92232
[18]	validation-rmse:6.87112
[19]	validation-rmse:6.82740
[20]	validation-rmse:6.78995
[21]	validation-rmse:6.75792
[22]	validation-rmse:6.72994
[23]	validation-rmse:6.70547
[24]	validation-rmse:6.68390
[25]	validation-rmse:6.66421
[26]	validation-rmse:6.64806
[27]	validation-rmse:6.63280
[28]	validation-rmse:6.61924
[29]	validation-rmse:6.60773
[30]	validation-rmse:6.59777
[31]	validation-rmse:6.58875
[32]	validation-rmse:6.58107
[33]	validation-rmse:6.57217
[34]	validation-rmse:

  xgb_model.save_model(model_data_path)
Successfully registered model 'best_xgboost_model'.
Created version '1' of model 'best_xgboost_model'.


In [25]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.svm import LinearSVR

mlflow.sklearn.autolog()

for model_class in (RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor, LinearSVR):

    with mlflow.start_run():

        mlflow.log_param("train-data-path", "../data/green_tripdata_2021-01.csv")
        mlflow.log_param("valid-data-path", "../data/green_tripdata_2021-02.csv")
        mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

        mlmodel = model_class()
        mlmodel.fit(X_train, y_train)

        y_pred = mlmodel.predict(X_val)
        rmse = root_mean_squared_error(y_val, y_pred)
        mlflow.log_metric("rmse", rmse)
        

: 