In [1]:
import pandas as pd
import pickle
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# sk learn imports for the ML model
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.svm import LinearSVR


from sklearn.metrics import mean_squared_error

## MLFLow Import

In [2]:
import mlflow


mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("nyc-taxi-experiment")

<Experiment: artifact_location=('/workspaces/MLOps-Model-Development-to-Production-deployment/Experiment '
 'Tracking using MLFlow/mlruns/1'), creation_time=1749424860439, experiment_id='1', last_update_time=1749424860439, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>

In [3]:
df = pd.read_parquet('/workspaces/MLOps-Model-Development-to-Production-deployment/Data/green_tripdata_2025-01.parquet')
df.head()

Unnamed: 0,VendorID,lpep_pickup_datetime,lpep_dropoff_datetime,store_and_fwd_flag,RatecodeID,PULocationID,DOLocationID,passenger_count,trip_distance,fare_amount,...,mta_tax,tip_amount,tolls_amount,ehail_fee,improvement_surcharge,total_amount,payment_type,trip_type,congestion_surcharge,cbd_congestion_fee
0,2,2025-01-01 00:03:01,2025-01-01 00:17:12,N,1.0,75,235,1.0,5.93,24.7,...,0.5,6.8,0.0,,1.0,34.0,1.0,1.0,0.0,0.0
1,2,2025-01-01 00:19:59,2025-01-01 00:25:52,N,1.0,166,75,1.0,1.32,8.6,...,0.5,0.0,0.0,,1.0,11.1,2.0,1.0,0.0,0.0
2,2,2025-01-01 00:05:29,2025-01-01 00:07:21,N,5.0,171,73,1.0,0.41,25.55,...,0.0,0.0,0.0,,1.0,26.55,2.0,2.0,0.0,0.0
3,2,2025-01-01 00:52:24,2025-01-01 01:07:52,N,1.0,74,223,1.0,4.12,21.2,...,0.5,6.13,6.94,,1.0,36.77,1.0,1.0,0.0,0.0
4,2,2025-01-01 00:25:05,2025-01-01 01:01:10,N,1.0,66,158,1.0,4.71,33.8,...,0.5,7.81,0.0,,1.0,46.86,1.0,1.0,2.75,0.0


## Preprocessing of data

In [4]:
df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

df = df[(df.duration >= 1) & (df.duration <= 60)]

categorical = ['PULocationID', 'DOLocationID']
numerical = ['trip_distance']

df[categorical] = df[categorical].astype(str)

In [5]:
train_dicts = df[categorical + numerical].to_dict(orient='records')
dv = DictVectorizer()
X_train = dv.fit_transform(train_dicts)

target = 'duration'
y_train = df[target].values

lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_train)

mean_squared_error(y_train, y_pred)

44.94737843983856

In [6]:
def preprocess_df(filename):
    if filename.endswith('.csv'):
        df = pd.read_csv(filename)

        df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
        df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)
    elif filename.endswith('.parquet'):
        df = pd.read_parquet(filename)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df

In [7]:
df_train = preprocess_df('/workspaces/MLOps-Model-Development-to-Production-deployment/Data/green_tripdata_2025-01.parquet')
df_val = preprocess_df('/workspaces/MLOps-Model-Development-to-Production-deployment/Data/green_tripdata_2025-02.parquet')

print(df_train.shape, df_val.shape)

(46307, 22) (44218, 22)


In [8]:
df_train['PU_DO'] = df_train['PULocationID'] + '_' + df_train['DOLocationID']
df_val['PU_DO'] = df_val['PULocationID'] + '_' + df_val['DOLocationID']
categorical = ['PU_DO'] #'PULocationID', 'DOLocationID']
numerical = ['trip_distance']

dv = DictVectorizer()

train_dicts = df_train[categorical + numerical].to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)

val_dicts = df_val[categorical + numerical].to_dict(orient='records')
X_val = dv.transform(val_dicts)

In [9]:
target = 'duration'
y_train = df_train[target].values
y_val = df_val[target].values

In [10]:
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_val)

mean_squared_error(y_val, y_pred)

37.19536000471952

In [11]:
with open('models/lin_reg.bin', 'wb') as f_out:
    pickle.dump((dv, lr), f_out)

In [12]:
## define a new run and put in the ml flow run

with mlflow.start_run():

    mlflow.set_tag("developer", "keyush")

    ## we can also log a different params
    mlflow.log_param("train-dataPath", "/workspaces/MLOps-Model-Development-to-Production-deployment/Data/green_tripdata_2025-01.parquet")
    mlflow.log_param("val-dataPath", "/workspaces/MLOps-Model-Development-to-Production-deployment/Data/green_tripdata_2025-02.parquet")

    alpha = 0.01
    mlflow.log_param("alpha", alpha)
    lr = Lasso(alpha)
    lr.fit(X_train, y_train)

    y_pred = lr.predict(X_val)

    mse = mean_squared_error(y_val, y_pred)
    mlflow.log_metric("mse", mse)

In [13]:
import xgboost as xgb
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [14]:
train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)

### fmin()
This is the main minimization function.
You pass a loss function (e.g., RMSE), a search space, and an algorithm.
It returns the best set of hyperparameters.

#### tpe (Tree-structured Parzen Estimator)
A Bayesian optimization algorithm.
Learns from previous trials to decide what parameters to try next (smarter than random search).

#### hp

Used to define the search space of hyperparameters.
Examples:

hp.uniform('learning_rate', 0.01, 0.3)
hp.choice('max_depth', [3, 5, 7, 10])

#### scope (from hyperopt.pyll)
Used to apply transformations to sampled hyperparameters.

Example:

scope.int(hp.quniform('max_depth', 3, 10, 1))
This means: sample integers from 3 to 10 (step 1).


In [21]:
import mlflow.xgboost


def objective_eval(params):
    with mlflow.start_run():
        # mlflow.set_tag("model", "xgboost")
        mlflow.set_tag("model", "xgboost_2")
        mlflow.log_params(params)
        xg = xgb.train(
            params,
            dtrain = train,
            num_boost_round = 1000, ## there will be 1000 iterations
            evals=[(valid, 'validation')],
            early_stopping_rounds=50 ## if there is no improvement then the model will stop. That is why you will less than 1000 rounds below in 
            ## each of the runs which is like an epoch where a new model is instantiated and run!
        ) 
        y_pred = xg.predict(valid)
        rmse = mean_squared_error(y_val, y_pred, squared=False)
        mlflow.log_metric("rmse", rmse)
        mlflow.xgboost.log_model(xg, artifact_path="models_mlflow")

    return {"loss": rmse, "status": STATUS_OK}

In [22]:
## hp helps us to modify the parameters, 

search_space = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    'learning_rate': hp.loguniform('learning_rate', -3, 0), ## here we find a value between e^(-3) whch is 0.05 and exp(0) which is 1
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    'objective': 'reg:linear', ## this is a reg problem and the scope of the target is continuous
    'seed': 42
}

best_result = fmin(
    fn=objective_eval,
    space=search_space,
    algo=tpe.suggest, ## tpe is the algorithm that will be used to search the best set of hyper params!
    max_evals=50,
    trials=Trials() ## this will help in logging the metrics
)

# from math import exp
# exp(-3)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.62174                           
[1]	validation-rmse:8.19759                           
[2]	validation-rmse:7.82780                           
[3]	validation-rmse:7.50611                           
[4]	validation-rmse:7.22790                           
[5]	validation-rmse:6.98744                           
[6]	validation-rmse:6.78090                           
[7]	validation-rmse:6.60383                           
[8]	validation-rmse:6.45268                           
[9]	validation-rmse:6.32340                           
[10]	validation-rmse:6.21304                          
[11]	validation-rmse:6.11983                          
  0%|          | 0/50 [00:03<?, ?trial/s, best loss=?]

[12]	validation-rmse:6.04037                          
[13]	validation-rmse:5.97304                          
[14]	validation-rmse:5.91571                          
[15]	validation-rmse:5.86670                          
[16]	validation-rmse:5.82533                          
[17]	validation-rmse:5.79001                          
[18]	validation-rmse:5.75916                          
[19]	validation-rmse:5.73213                          
[20]	validation-rmse:5.70922                          
[21]	validation-rmse:5.68995                          
[22]	validation-rmse:5.67312                          
[23]	validation-rmse:5.65843                          
[24]	validation-rmse:5.64453                          
[25]	validation-rmse:5.63171                          
[26]	validation-rmse:5.62186                          
[27]	validation-rmse:5.61236                          
[28]	validation-rmse:5.60423                          
[29]	validation-rmse:5.59653                          
[30]	valid

  xgb_model.save_model(model_data_path)




  2%|▏         | 1/50 [01:51<1:30:43, 111.09s/trial, best loss: 5.459416790729642]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.30580                                                       
[1]	validation-rmse:7.66203                                                       
[2]	validation-rmse:7.15002                                                       
[3]	validation-rmse:6.74535                                                       
[4]	validation-rmse:6.42995                                                       
[5]	validation-rmse:6.18610                                                       
[6]	validation-rmse:5.99502                                                       
[7]	validation-rmse:5.84984                                                       
[8]	validation-rmse:5.73982                                                       
[9]	validation-rmse:5.65657                                                       
[10]	validation-rmse:5.59180                                                      
[11]	validation-rmse:5.54314                                                      
[12]

  xgb_model.save_model(model_data_path)




  4%|▍         | 2/50 [02:32<55:57, 69.95s/trial, best loss: 5.331994161317094]   

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.91140                                                    
[1]	validation-rmse:7.09042                                                    
[2]	validation-rmse:6.53401                                                    
[3]	validation-rmse:6.16785                                                    
[4]	validation-rmse:5.92665                                                    
[5]	validation-rmse:5.76699                                                    
[6]	validation-rmse:5.65918                                                    
[7]	validation-rmse:5.58697                                                    
[8]	validation-rmse:5.54140                                                    
[9]	validation-rmse:5.50581                                                    
[10]	validation-rmse:5.48245                                                   
[11]	validation-rmse:5.46234                                                   
[12]	validation-rmse:5.45173            

  xgb_model.save_model(model_data_path)




  6%|▌         | 3/50 [02:56<38:36, 49.28s/trial, best loss: 5.331994161317094]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.23409                                                    
[1]	validation-rmse:6.32716                                                    
[2]	validation-rmse:5.91407                                                    
[3]	validation-rmse:5.72846                                                    
[4]	validation-rmse:5.62696                                                    
[5]	validation-rmse:5.58143                                                    
[6]	validation-rmse:5.55120                                                    
[7]	validation-rmse:5.53330                                                    
[8]	validation-rmse:5.52084                                                    
[9]	validation-rmse:5.51191                                                    
[10]	validation-rmse:5.50421                                                   
[11]	validation-rmse:5.49835                                                   
[12]	validation-rmse:5.49421            

  xgb_model.save_model(model_data_path)




  8%|▊         | 4/50 [03:30<32:57, 42.98s/trial, best loss: 5.331994161317094]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:5.74205                                                    
[1]	validation-rmse:5.55045                                                    
[2]	validation-rmse:5.53797                                                    
[3]	validation-rmse:5.53949                                                    
[4]	validation-rmse:5.52085                                                    
[5]	validation-rmse:5.51832                                                    
[6]	validation-rmse:5.51967                                                    
[7]	validation-rmse:5.52605                                                    
[8]	validation-rmse:5.52489                                                    
[9]	validation-rmse:5.52535                                                    
[10]	validation-rmse:5.52618                                                   
[11]	validation-rmse:5.52755                                                   
[12]	validation-rmse:5.51913            

  xgb_model.save_model(model_data_path)




 10%|█         | 5/50 [03:43<24:17, 32.38s/trial, best loss: 5.331994161317094]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.34305                                                    
[1]	validation-rmse:7.72344                                                    
[2]	validation-rmse:7.22595                                                    
[3]	validation-rmse:6.82656                                                    
[4]	validation-rmse:6.51249                                                    
[5]	validation-rmse:6.26366                                                    
[6]	validation-rmse:6.06878                                                    
[7]	validation-rmse:5.91792                                                    
[8]	validation-rmse:5.79984                                                    
[9]	validation-rmse:5.70355                                                    
[10]	validation-rmse:5.63249                                                   
[11]	validation-rmse:5.57586                                                   
[12]	validation-rmse:5.53113            

  xgb_model.save_model(model_data_path)




 12%|█▏        | 6/50 [04:14<23:14, 31.70s/trial, best loss: 5.331994161317094]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.20586                                                    
[1]	validation-rmse:7.50674                                                    
[2]	validation-rmse:6.97246                                                    
[3]	validation-rmse:6.56981                                                    
[4]	validation-rmse:6.26914                                                    
[5]	validation-rmse:6.04670                                                    
[6]	validation-rmse:5.88406                                                    
[7]	validation-rmse:5.76399                                                    
[8]	validation-rmse:5.67674                                                    
[9]	validation-rmse:5.61044                                                    
[10]	validation-rmse:5.56111                                                   
[11]	validation-rmse:5.52347                                                   
[12]	validation-rmse:5.49636            

  xgb_model.save_model(model_data_path)




 14%|█▍        | 7/50 [04:49<23:38, 32.98s/trial, best loss: 5.331994161317094]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.85450                                                    
[1]	validation-rmse:6.99216                                                    
[2]	validation-rmse:6.41324                                                    
[3]	validation-rmse:6.03838                                                    
[4]	validation-rmse:5.79660                                                    
[5]	validation-rmse:5.64211                                                    
[6]	validation-rmse:5.54511                                                    
[7]	validation-rmse:5.48798                                                    
[8]	validation-rmse:5.45211                                                    
[9]	validation-rmse:5.42766                                                    
[10]	validation-rmse:5.41627                                                   
[11]	validation-rmse:5.40682                                                   
[12]	validation-rmse:5.40106            

  xgb_model.save_model(model_data_path)




 16%|█▌        | 8/50 [05:23<23:08, 33.06s/trial, best loss: 5.331994161317094]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:5.79520                                                    
[1]	validation-rmse:5.53163                                                    
[2]	validation-rmse:5.49197                                                    
[3]	validation-rmse:5.47528                                                    
[4]	validation-rmse:5.44046                                                    
[5]	validation-rmse:5.43993                                                    
[6]	validation-rmse:5.42160                                                    
[7]	validation-rmse:5.41710                                                    
[8]	validation-rmse:5.41276                                                    
[9]	validation-rmse:5.40972                                                    
[10]	validation-rmse:5.40486                                                   
[11]	validation-rmse:5.40425                                                   
[12]	validation-rmse:5.40016            

  xgb_model.save_model(model_data_path)




 18%|█▊        | 9/50 [05:34<17:59, 26.32s/trial, best loss: 5.331994161317094]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:6.39135                                                    
[1]	validation-rmse:5.76559                                                    
[2]	validation-rmse:5.60786                                                    
[3]	validation-rmse:5.55462                                                    
[4]	validation-rmse:5.52934                                                    
[5]	validation-rmse:5.51608                                                    
[6]	validation-rmse:5.50926                                                    
[7]	validation-rmse:5.49113                                                    
[8]	validation-rmse:5.48374                                                    
[9]	validation-rmse:5.47929                                                    
[10]	validation-rmse:5.47583                                                   
[11]	validation-rmse:5.47296                                                   
[12]	validation-rmse:5.47193            

  xgb_model.save_model(model_data_path)




 20%|██        | 10/50 [05:50<15:19, 22.99s/trial, best loss: 5.331994161317094]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:5.46677                                                     
[1]	validation-rmse:5.46721                                                     
[2]	validation-rmse:5.47947                                                     
[3]	validation-rmse:5.45417                                                     
[4]	validation-rmse:5.45115                                                     
[5]	validation-rmse:5.44988                                                     
[6]	validation-rmse:5.44809                                                     
[7]	validation-rmse:5.44556                                                     
[8]	validation-rmse:5.44109                                                     
[9]	validation-rmse:5.44214                                                     
[10]	validation-rmse:5.44240                                                    
[11]	validation-rmse:5.44919                                                    
[12]	validation-rmse:5.44387

  xgb_model.save_model(model_data_path)




 22%|██▏       | 11/50 [06:00<12:27, 19.15s/trial, best loss: 5.331994161317094]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.55996                                                     
[1]	validation-rmse:8.08504                                                     
[2]	validation-rmse:7.67549                                                     
[3]	validation-rmse:7.32151                                                     
[4]	validation-rmse:7.01855                                                     
[5]	validation-rmse:6.75890                                                     
[6]	validation-rmse:6.53778                                                     
[7]	validation-rmse:6.35072                                                     
[8]	validation-rmse:6.19187                                                     
[9]	validation-rmse:6.05750                                                     
[10]	validation-rmse:5.94475                                                    
[11]	validation-rmse:5.84939                                                    
[12]	validation-rmse:5.76977

  xgb_model.save_model(model_data_path)




 24%|██▍       | 12/50 [06:44<16:56, 26.74s/trial, best loss: 5.331994161317094]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.67433                                                     
[1]	validation-rmse:8.28921                                                     
[2]	validation-rmse:7.94601                                                     
[3]	validation-rmse:7.64216                                                     
[4]	validation-rmse:7.37258                                                     
[5]	validation-rmse:7.13442                                                     
[6]	validation-rmse:6.92889                                                     
[7]	validation-rmse:6.74122                                                     
[8]	validation-rmse:6.58513                                                     
[9]	validation-rmse:6.44112                                                     
[10]	validation-rmse:6.31989                                                    
[11]	validation-rmse:6.20822                                                    
[12]	validation-rmse:6.11610

  xgb_model.save_model(model_data_path)




 26%|██▌       | 13/50 [07:40<21:52, 35.47s/trial, best loss: 5.331994161317094]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.76351                                                     
[1]	validation-rmse:8.45180                                                     
[2]	validation-rmse:8.16450                                                     
[3]	validation-rmse:7.90582                                                     
[4]	validation-rmse:7.66847                                                     
[5]	validation-rmse:7.45322                                                     
[6]	validation-rmse:7.25492                                                     
[7]	validation-rmse:7.08070                                                     
[8]	validation-rmse:6.92365                                                     
[9]	validation-rmse:6.77199                                                     
[10]	validation-rmse:6.64354                                                    
[11]	validation-rmse:6.52599                                                    
[12]	validation-rmse:6.41358

  xgb_model.save_model(model_data_path)




 28%|██▊       | 14/50 [08:53<28:12, 47.02s/trial, best loss: 5.331994161317094]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.63216                                                     
[1]	validation-rmse:8.21320                                                     
[2]	validation-rmse:7.84600                                                     
[3]	validation-rmse:7.52544                                                     
[4]	validation-rmse:7.24587                                                     
[5]	validation-rmse:7.00012                                                     
[6]	validation-rmse:6.78685                                                     
[7]	validation-rmse:6.60280                                                     
[8]	validation-rmse:6.44232                                                     
[9]	validation-rmse:6.30533                                                     
[10]	validation-rmse:6.18721                                                    
[11]	validation-rmse:6.08730                                                    
[12]	validation-rmse:5.99715

  xgb_model.save_model(model_data_path)




 30%|███       | 15/50 [09:47<28:35, 49.01s/trial, best loss: 5.322694735763029]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.56688                                                     
[1]	validation-rmse:6.65349                                                     
[2]	validation-rmse:6.13532                                                     
[3]	validation-rmse:5.83920                                                     
[4]	validation-rmse:5.67726                                                     
[5]	validation-rmse:5.58178                                                     
[6]	validation-rmse:5.52440                                                     
[7]	validation-rmse:5.48663                                                     
[8]	validation-rmse:5.46404                                                     
[9]	validation-rmse:5.45261                                                     
[10]	validation-rmse:5.44616                                                    
[11]	validation-rmse:5.43849                                                    
[12]	validation-rmse:5.43592

  xgb_model.save_model(model_data_path)




[0]	validation-rmse:7.45390                                                     
 32%|███▏      | 16/50 [10:05<22:24, 39.53s/trial, best loss: 5.322694735763029]

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:6.55296                                                     
[2]	validation-rmse:6.08316                                                     
[3]	validation-rmse:5.84310                                                     
[4]	validation-rmse:5.71961                                                     
[5]	validation-rmse:5.65599                                                     
[6]	validation-rmse:5.62266                                                     
[7]	validation-rmse:5.59966                                                     
[8]	validation-rmse:5.57666                                                     
[9]	validation-rmse:5.56808                                                     
[10]	validation-rmse:5.56610                                                    
[11]	validation-rmse:5.56520                                                    
[12]	validation-rmse:5.56437                                                    
[13]	validation-rmse:5.56477

  xgb_model.save_model(model_data_path)




 34%|███▍      | 17/50 [10:25<18:34, 33.79s/trial, best loss: 5.322694735763029]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.78921                                                     
[1]	validation-rmse:8.49552                                                     
[2]	validation-rmse:8.22448                                                     
[3]	validation-rmse:7.97424                                                     
[4]	validation-rmse:7.74388                                                     
[5]	validation-rmse:7.53205                                                     
[6]	validation-rmse:7.33764                                                     
[7]	validation-rmse:7.15875                                                     
[8]	validation-rmse:6.99478                                                     
[9]	validation-rmse:6.84454                                                     
[10]	validation-rmse:6.70735                                                    
[11]	validation-rmse:6.58237                                                    
[12]	validation-rmse:6.46835

  xgb_model.save_model(model_data_path)




 36%|███▌      | 18/50 [12:12<29:40, 55.65s/trial, best loss: 5.322694735763029]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.64803                                                     
[1]	validation-rmse:8.24198                                                     
[2]	validation-rmse:7.88330                                                     
[3]	validation-rmse:7.56673                                                     
[4]	validation-rmse:7.29013                                                     
[5]	validation-rmse:7.04557                                                     
[6]	validation-rmse:6.83399                                                     
[7]	validation-rmse:6.64791                                                     
[8]	validation-rmse:6.48421                                                     
[9]	validation-rmse:6.34515                                                     
[10]	validation-rmse:6.22151                                                    
[11]	validation-rmse:6.11597                                                    
[12]	validation-rmse:6.02312

  xgb_model.save_model(model_data_path)




 38%|███▊      | 19/50 [13:09<29:06, 56.34s/trial, best loss: 5.320745007520114]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:6.84307                                                     
[1]	validation-rmse:5.99587                                                     
[2]	validation-rmse:5.67967                                                     
[3]	validation-rmse:5.55996                                                     
[4]	validation-rmse:5.49966                                                     
[5]	validation-rmse:5.48141                                                     
[6]	validation-rmse:5.47029                                                     
[7]	validation-rmse:5.46891                                                     
[8]	validation-rmse:5.46691                                                     
[9]	validation-rmse:5.46703                                                     
[10]	validation-rmse:5.46650                                                    
[11]	validation-rmse:5.46344                                                    
[12]	validation-rmse:5.46056

  xgb_model.save_model(model_data_path)




[0]	validation-rmse:8.83952                                                     
[1]	validation-rmse:8.59125                                                     
 40%|████      | 20/50 [13:23<21:46, 43.56s/trial, best loss: 5.320745007520114]

  self.starting_round = model.num_boosted_rounds()



[2]	validation-rmse:8.36056                                                     
[3]	validation-rmse:8.14637                                                     
[4]	validation-rmse:7.94692                                                     
[5]	validation-rmse:7.76270                                                     
[6]	validation-rmse:7.59165                                                     
[7]	validation-rmse:7.43261                                                     
[8]	validation-rmse:7.28603                                                     
[9]	validation-rmse:7.15070                                                     
[10]	validation-rmse:7.02524                                                    
[11]	validation-rmse:6.90962                                                    
[12]	validation-rmse:6.80299                                                    
[13]	validation-rmse:6.70542                                                    
[14]	validation-rmse:6.61554

  xgb_model.save_model(model_data_path)




 42%|████▏     | 21/50 [14:05<20:46, 43.00s/trial, best loss: 5.320745007520114]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.49622                                                     
[1]	validation-rmse:7.98018                                                     
[2]	validation-rmse:7.54829                                                     
[3]	validation-rmse:7.18454                                                     
[4]	validation-rmse:6.88137                                                     
[5]	validation-rmse:6.63139                                                     
[6]	validation-rmse:6.42385                                                     
[7]	validation-rmse:6.25463                                                     
[8]	validation-rmse:6.11375                                                     
[9]	validation-rmse:5.99730                                                     
[10]	validation-rmse:5.90165                                                    
[11]	validation-rmse:5.82343                                                    
[12]	validation-rmse:5.75831

  xgb_model.save_model(model_data_path)




[0]	validation-rmse:8.50333                                                     
 44%|████▍     | 22/50 [14:57<21:21, 45.78s/trial, best loss: 5.317855614781456]

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:7.99362                                                     
[2]	validation-rmse:7.56263                                                     
[3]	validation-rmse:7.20407                                                     
[4]	validation-rmse:6.90462                                                     
[5]	validation-rmse:6.65956                                                     
[6]	validation-rmse:6.45524                                                     
[7]	validation-rmse:6.28625                                                     
[8]	validation-rmse:6.14862                                                     
[9]	validation-rmse:6.03530                                                     
[10]	validation-rmse:5.94343                                                    
[11]	validation-rmse:5.86793                                                    
[12]	validation-rmse:5.80350                                                    
[13]	validation-rmse:5.75189

  xgb_model.save_model(model_data_path)




[0]	validation-rmse:8.52836                                                     
[1]	validation-rmse:8.03708                                                    
[2]	validation-rmse:7.62377                                                    
 46%|████▌     | 23/50 [15:45<20:47, 46.20s/trial, best loss: 5.30541866194849]

  self.starting_round = model.num_boosted_rounds()



[3]	validation-rmse:7.27830                                                    
[4]	validation-rmse:6.99087                                                    
[5]	validation-rmse:6.75008                                                    
[6]	validation-rmse:6.55151                                                    
[7]	validation-rmse:6.38983                                                    
[8]	validation-rmse:6.25625                                                    
[9]	validation-rmse:6.14509                                                    
[10]	validation-rmse:6.05398                                                   
[11]	validation-rmse:5.97812                                                   
[12]	validation-rmse:5.91400                                                   
[13]	validation-rmse:5.86277                                                   
[14]	validation-rmse:5.82012                                                   
[15]	validation-rmse:5.78650            

  xgb_model.save_model(model_data_path)




[0]	validation-rmse:8.16701                                                    
 48%|████▊     | 24/50 [16:24<19:09, 44.20s/trial, best loss: 5.30541866194849]

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:7.45547                                                    
[2]	validation-rmse:6.92374                                                    
[3]	validation-rmse:6.53371                                                    
[4]	validation-rmse:6.24825                                                    
[5]	validation-rmse:6.04068                                                    
[6]	validation-rmse:5.88896                                                    
[7]	validation-rmse:5.78098                                                    
[8]	validation-rmse:5.70089                                                    
[9]	validation-rmse:5.64256                                                    
[10]	validation-rmse:5.59720                                                   
[11]	validation-rmse:5.56337                                                   
[12]	validation-rmse:5.53871                                                   
[13]	validation-rmse:5.51757            

  xgb_model.save_model(model_data_path)




[0]	validation-rmse:8.23161                                                    
[1]	validation-rmse:7.56995                                                    
[2]	validation-rmse:7.07433                                                    
 50%|█████     | 25/50 [16:58<17:07, 41.08s/trial, best loss: 5.30541866194849]

  self.starting_round = model.num_boosted_rounds()



[3]	validation-rmse:6.70668                                                    
[4]	validation-rmse:6.43583                                                    
[5]	validation-rmse:6.23800                                                    
[6]	validation-rmse:6.09561                                                    
[7]	validation-rmse:5.99174                                                    
[8]	validation-rmse:5.91816                                                    
[9]	validation-rmse:5.86154                                                    
[10]	validation-rmse:5.82109                                                   
[11]	validation-rmse:5.78533                                                   
[12]	validation-rmse:5.76032                                                   
[13]	validation-rmse:5.74200                                                   
[14]	validation-rmse:5.72218                                                   
[15]	validation-rmse:5.71267            

  xgb_model.save_model(model_data_path)




[0]	validation-rmse:8.10663                                                    
 52%|█████▏    | 26/50 [17:34<15:52, 39.67s/trial, best loss: 5.30541866194849]

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:7.36733                                                    
[2]	validation-rmse:6.83053                                                    
[3]	validation-rmse:6.44681                                                    
[4]	validation-rmse:6.17722                                                    
[5]	validation-rmse:5.98690                                                    
[6]	validation-rmse:5.85514                                                    
[7]	validation-rmse:5.76363                                                    
[8]	validation-rmse:5.69956                                                    
[9]	validation-rmse:5.65144                                                    
[10]	validation-rmse:5.61662                                                   
[11]	validation-rmse:5.59217                                                   
[12]	validation-rmse:5.57294                                                   
[13]	validation-rmse:5.55903            

  xgb_model.save_model(model_data_path)




 54%|█████▍    | 27/50 [18:26<16:34, 43.24s/trial, best loss: 5.30541866194849]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.61458                                                    
[1]	validation-rmse:6.68694                                                    
[2]	validation-rmse:6.13393                                                    
[3]	validation-rmse:5.81151                                                    
[4]	validation-rmse:5.62907                                                    
[5]	validation-rmse:5.52769                                                    
[6]	validation-rmse:5.47069                                                    
[7]	validation-rmse:5.43783                                                    
[8]	validation-rmse:5.41840                                                    
[9]	validation-rmse:5.40373                                                    
[10]	validation-rmse:5.39293                                                   
[11]	validation-rmse:5.38513                                                   
[12]	validation-rmse:5.38060            

  xgb_model.save_model(model_data_path)




 56%|█████▌    | 28/50 [18:52<13:58, 38.12s/trial, best loss: 5.30541866194849]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.71929                                                    
[1]	validation-rmse:8.36892                                                    
[2]	validation-rmse:8.05160                                                    
[3]	validation-rmse:7.76584                                                    
[4]	validation-rmse:7.50892                                                    
[5]	validation-rmse:7.27834                                                    
[6]	validation-rmse:7.07182                                                    
[7]	validation-rmse:6.88755                                                    
[8]	validation-rmse:6.72292                                                    
[9]	validation-rmse:6.57645                                                    
[10]	validation-rmse:6.44653                                                   
[11]	validation-rmse:6.33102                                                   
[12]	validation-rmse:6.22804            

  xgb_model.save_model(model_data_path)




 58%|█████▊    | 29/50 [20:16<18:08, 51.85s/trial, best loss: 5.30541866194849]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.41742                                                    
[1]	validation-rmse:7.84974                                                    
[2]	validation-rmse:7.38629                                                    
[3]	validation-rmse:7.01045                                                    
[4]	validation-rmse:6.70815                                                    
[5]	validation-rmse:6.46690                                                    
[6]	validation-rmse:6.27386                                                    
[7]	validation-rmse:6.12132                                                    
[8]	validation-rmse:6.00000                                                    
[9]	validation-rmse:5.90511                                                    
[10]	validation-rmse:5.83049                                                   
[11]	validation-rmse:5.77165                                                   
[12]	validation-rmse:5.72380            

  xgb_model.save_model(model_data_path)




[0]	validation-rmse:8.53789                                                    
 60%|██████    | 30/50 [21:10<17:29, 52.47s/trial, best loss: 5.30541866194849]

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:8.05131                                                    
[2]	validation-rmse:7.63661                                                    
[3]	validation-rmse:7.28440                                                    
[4]	validation-rmse:6.98650                                                    
[5]	validation-rmse:6.73671                                                    
[6]	validation-rmse:6.52825                                                    
[7]	validation-rmse:6.35111                                                    
[8]	validation-rmse:6.20537                                                    
[9]	validation-rmse:6.08300                                                    
[10]	validation-rmse:5.98251                                                   
[11]	validation-rmse:5.89831                                                   
[12]	validation-rmse:5.82922                                                   
[13]	validation-rmse:5.76976            

  xgb_model.save_model(model_data_path)




 62%|██████▏   | 31/50 [22:02<16:39, 52.59s/trial, best loss: 5.30541866194849]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.51677                                                    
[1]	validation-rmse:8.01410                                                    
[2]	validation-rmse:7.58781                                                    
[3]	validation-rmse:7.22883                                                    
[4]	validation-rmse:6.92800                                                    
[5]	validation-rmse:6.67664                                                    
[6]	validation-rmse:6.46789                                                    
[7]	validation-rmse:6.29529                                                    
[8]	validation-rmse:6.15206                                                    
[9]	validation-rmse:6.03324                                                    
[10]	validation-rmse:5.93637                                                   
[11]	validation-rmse:5.85672                                                   
[12]	validation-rmse:5.79150            

  xgb_model.save_model(model_data_path)




 64%|██████▍   | 32/50 [22:54<15:40, 52.25s/trial, best loss: 5.30541866194849]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.75241                                                    
[1]	validation-rmse:8.42963                                                    
[2]	validation-rmse:8.13528                                                    
[3]	validation-rmse:7.86719                                                    
[4]	validation-rmse:7.62364                                                    
[5]	validation-rmse:7.40323                                                    
[6]	validation-rmse:7.20360                                                    
[7]	validation-rmse:7.02332                                                    
[8]	validation-rmse:6.85944                                                    
[9]	validation-rmse:6.71340                                                    
[10]	validation-rmse:6.58108                                                   
[11]	validation-rmse:6.46203                                                   
[12]	validation-rmse:6.35503            

  xgb_model.save_model(model_data_path)




 66%|██████▌   | 33/50 [24:00<15:56, 56.27s/trial, best loss: 5.30541866194849]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.32661                                                    
[1]	validation-rmse:7.69643                                                    
[2]	validation-rmse:7.19416                                                    
[3]	validation-rmse:6.79584                                                    
[4]	validation-rmse:6.48379                                                    
[5]	validation-rmse:6.23887                                                    
[6]	validation-rmse:6.05053                                                    
[7]	validation-rmse:5.90497                                                    
[8]	validation-rmse:5.79434                                                    
[9]	validation-rmse:5.70824                                                    
[10]	validation-rmse:5.64152                                                   
[11]	validation-rmse:5.58781                                                   
[12]	validation-rmse:5.54793            

  xgb_model.save_model(model_data_path)




 68%|██████▊   | 34/50 [24:39<13:39, 51.19s/trial, best loss: 5.30541866194849]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.93447                                                    
[1]	validation-rmse:7.13442                                                    
[2]	validation-rmse:6.60102                                                    
[3]	validation-rmse:6.25367                                                    
[4]	validation-rmse:6.02906                                                    
[5]	validation-rmse:5.88596                                                    
[6]	validation-rmse:5.79008                                                    
[7]	validation-rmse:5.72672                                                    
[8]	validation-rmse:5.68227                                                    
[9]	validation-rmse:5.65064                                                    
[10]	validation-rmse:5.63122                                                   
[11]	validation-rmse:5.61643                                                   
[12]	validation-rmse:5.60719            

  xgb_model.save_model(model_data_path)




[0]	validation-rmse:8.59844                                                    
 70%|███████   | 35/50 [25:14<11:33, 46.24s/trial, best loss: 5.30541866194849]

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:8.15710                                                    
[2]	validation-rmse:7.77270                                                    
[3]	validation-rmse:7.44069                                                    
[4]	validation-rmse:7.15333                                                    
[5]	validation-rmse:6.90788                                                    
[6]	validation-rmse:6.69658                                                    
[7]	validation-rmse:6.51577                                                    
[8]	validation-rmse:6.36160                                                    
[9]	validation-rmse:6.23202                                                    
[10]	validation-rmse:6.12098                                                   
[11]	validation-rmse:6.02524                                                   
[12]	validation-rmse:5.94519                                                   
[13]	validation-rmse:5.87746            

  xgb_model.save_model(model_data_path)




[0]	validation-rmse:8.83732                                                    
[1]	validation-rmse:8.58722                                                    
[2]	validation-rmse:8.35552                                                    
[3]	validation-rmse:8.13972                                                    
 72%|███████▏  | 36/50 [26:04<11:04, 47.46s/trial, best loss: 5.30110649565218]

  self.starting_round = model.num_boosted_rounds()



[4]	validation-rmse:7.94034                                                    
[5]	validation-rmse:7.75625                                                    
[6]	validation-rmse:7.58498                                                    
[7]	validation-rmse:7.42739                                                    
[8]	validation-rmse:7.28251                                                    
[9]	validation-rmse:7.14762                                                    
[10]	validation-rmse:7.02447                                                   
[11]	validation-rmse:6.91128                                                   
[12]	validation-rmse:6.80646                                                   
[13]	validation-rmse:6.71157                                                   
[14]	validation-rmse:6.62349                                                   
[15]	validation-rmse:6.54278                                                   
[16]	validation-rmse:6.46871            

  xgb_model.save_model(model_data_path)




 74%|███████▍  | 37/50 [26:42<09:41, 44.70s/trial, best loss: 5.30110649565218]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.39700                                                    
[1]	validation-rmse:7.81012                                                    
[2]	validation-rmse:7.32796                                                    
[3]	validation-rmse:6.93594                                                    
[4]	validation-rmse:6.61826                                                    
[5]	validation-rmse:6.36349                                                    
[6]	validation-rmse:6.15989                                                    
[7]	validation-rmse:5.99746                                                    
[8]	validation-rmse:5.86924                                                    
[9]	validation-rmse:5.76783                                                    
[10]	validation-rmse:5.68834                                                   
[11]	validation-rmse:5.62669                                                   
[12]	validation-rmse:5.57787            

  xgb_model.save_model(model_data_path)




 76%|███████▌  | 38/50 [27:29<09:03, 45.33s/trial, best loss: 5.30110649565218]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.70207                                                    
[1]	validation-rmse:8.33881                                                    
[2]	validation-rmse:8.01336                                                    
[3]	validation-rmse:7.72261                                                    
[4]	validation-rmse:7.46369                                                    
[5]	validation-rmse:7.23324                                                    
[6]	validation-rmse:7.02863                                                    
[7]	validation-rmse:6.84787                                                    
[8]	validation-rmse:6.68782                                                    
[9]	validation-rmse:6.54672                                                    
[10]	validation-rmse:6.42266                                                   
[11]	validation-rmse:6.31344                                                   
[12]	validation-rmse:6.21750            

  xgb_model.save_model(model_data_path)




 78%|███████▊  | 39/50 [28:40<09:43, 53.06s/trial, best loss: 5.30110649565218]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.26008                                                    
[1]	validation-rmse:7.59686                                                    
[2]	validation-rmse:7.08307                                                    
[3]	validation-rmse:6.68898                                                    
[4]	validation-rmse:6.39080                                                    
[5]	validation-rmse:6.16720                                                    
[6]	validation-rmse:5.99799                                                    
[7]	validation-rmse:5.87140                                                    
[8]	validation-rmse:5.77904                                                    
[9]	validation-rmse:5.70762                                                    
[10]	validation-rmse:5.65509                                                   
[11]	validation-rmse:5.61555                                                   
[12]	validation-rmse:5.58594            

  xgb_model.save_model(model_data_path)




 80%|████████  | 40/50 [29:22<08:16, 49.68s/trial, best loss: 5.30110649565218]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.99705                                                    
[1]	validation-rmse:7.19947                                                    
[2]	validation-rmse:6.63959                                                    
[3]	validation-rmse:6.25375                                                    
[4]	validation-rmse:5.99163                                                    
[5]	validation-rmse:5.81534                                                    
[6]	validation-rmse:5.69795                                                    
[7]	validation-rmse:5.61815                                                    
[8]	validation-rmse:5.56334                                                    
[9]	validation-rmse:5.52653                                                    
[10]	validation-rmse:5.49935                                                   
[11]	validation-rmse:5.48005                                                   
[12]	validation-rmse:5.46527            

  xgb_model.save_model(model_data_path)




 82%|████████▏ | 41/50 [29:51<06:32, 43.62s/trial, best loss: 5.30110649565218]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:6.82300                                                    
[1]	validation-rmse:5.97781                                                    
[2]	validation-rmse:5.68618                                                    
[3]	validation-rmse:5.57754                                                    
[4]	validation-rmse:5.52947                                                    
[5]	validation-rmse:5.50996                                                    
[6]	validation-rmse:5.49375                                                    
[7]	validation-rmse:5.48323                                                    
[8]	validation-rmse:5.47582                                                    
[9]	validation-rmse:5.46704                                                    
[10]	validation-rmse:5.46395                                                   
[11]	validation-rmse:5.45486                                                   
[12]	validation-rmse:5.45287            

  xgb_model.save_model(model_data_path)




 84%|████████▍ | 42/50 [30:11<04:51, 36.45s/trial, best loss: 5.30110649565218]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.56883                                                    
[1]	validation-rmse:8.10105                                                    
[2]	validation-rmse:7.69561                                                    
[3]	validation-rmse:7.34567                                                    
[4]	validation-rmse:7.04418                                                    
[5]	validation-rmse:6.78696                                                    
[6]	validation-rmse:6.56592                                                    
[7]	validation-rmse:6.37939                                                    
[8]	validation-rmse:6.22047                                                    
[9]	validation-rmse:6.08651                                                    
[10]	validation-rmse:5.97308                                                   
[11]	validation-rmse:5.87620                                                   
[12]	validation-rmse:5.79572            

  xgb_model.save_model(model_data_path)




 86%|████████▌ | 43/50 [30:54<04:28, 38.39s/trial, best loss: 5.30110649565218]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.77876                                                    
[1]	validation-rmse:6.89132                                                    
[2]	validation-rmse:6.31590                                                    
[3]	validation-rmse:5.95717                                                    
[4]	validation-rmse:5.72965                                                    
[5]	validation-rmse:5.59335                                                    
[6]	validation-rmse:5.51318                                                    
[7]	validation-rmse:5.46561                                                    
[8]	validation-rmse:5.43689                                                    
[9]	validation-rmse:5.41955                                                    
[10]	validation-rmse:5.40649                                                   
[11]	validation-rmse:5.39876                                                   
[12]	validation-rmse:5.39345            

  xgb_model.save_model(model_data_path)




 88%|████████▊ | 44/50 [31:25<03:37, 36.28s/trial, best loss: 5.30110649565218]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.23963                                                    
[1]	validation-rmse:7.57169                                                    
[2]	validation-rmse:7.05193                                                    
[3]	validation-rmse:6.65920                                                    
[4]	validation-rmse:6.36307                                                    
[5]	validation-rmse:6.14139                                                    
[6]	validation-rmse:5.97017                                                    
[7]	validation-rmse:5.84915                                                    
[8]	validation-rmse:5.75013                                                    
[9]	validation-rmse:5.68468                                                    
[10]	validation-rmse:5.63364                                                   
[11]	validation-rmse:5.58891                                                   
[12]	validation-rmse:5.56132            

  xgb_model.save_model(model_data_path)




 90%|█████████ | 45/50 [31:56<02:53, 34.70s/trial, best loss: 5.30110649565218]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:8.76410                                                    
[1]	validation-rmse:8.45184                                                    
[2]	validation-rmse:8.16690                                                    
[3]	validation-rmse:7.90808                                                    
[4]	validation-rmse:7.67194                                                    
[5]	validation-rmse:7.45898                                                    
[6]	validation-rmse:7.26572                                                    
[7]	validation-rmse:7.09065                                                    
[8]	validation-rmse:6.93128                                                    
[9]	validation-rmse:6.78946                                                    
[10]	validation-rmse:6.66177                                                   
[11]	validation-rmse:6.54596                                                   
[12]	validation-rmse:6.44151            

  xgb_model.save_model(model_data_path)




 92%|█████████▏| 46/50 [32:43<02:33, 38.38s/trial, best loss: 5.30110649565218]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.67605                                                    
[1]	validation-rmse:6.75956                                                    
[2]	validation-rmse:6.19438                                                    
[3]	validation-rmse:5.85548                                                    
[4]	validation-rmse:5.65468                                                    
[5]	validation-rmse:5.54559                                                    
[6]	validation-rmse:5.48089                                                    
[7]	validation-rmse:5.44427                                                    
[8]	validation-rmse:5.42227                                                    
[9]	validation-rmse:5.40925                                                    
[10]	validation-rmse:5.40175                                                   
[11]	validation-rmse:5.39440                                                   
[12]	validation-rmse:5.39066            

  xgb_model.save_model(model_data_path)




[0]	validation-rmse:8.61076                                                    
 94%|█████████▍| 47/50 [33:20<01:53, 37.79s/trial, best loss: 5.30110649565218]

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:8.17697                                                    
[2]	validation-rmse:7.79934                                                    
[3]	validation-rmse:7.47054                                                    
[4]	validation-rmse:7.18370                                                    
[5]	validation-rmse:6.93596                                                    
[6]	validation-rmse:6.72347                                                    
[7]	validation-rmse:6.54128                                                    
[8]	validation-rmse:6.38460                                                    
[9]	validation-rmse:6.25023                                                    
[10]	validation-rmse:6.13679                                                   
[11]	validation-rmse:6.03733                                                   
[12]	validation-rmse:5.95388                                                   
[13]	validation-rmse:5.88133            

  xgb_model.save_model(model_data_path)




[0]	validation-rmse:6.01503                                                    
 96%|█████████▌| 48/50 [34:15<01:25, 42.95s/trial, best loss: 5.30110649565218]

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:5.64083                                                    
[2]	validation-rmse:5.57767                                                    
[3]	validation-rmse:5.56948                                                    
[4]	validation-rmse:5.56237                                                    
[5]	validation-rmse:5.55325                                                    
[6]	validation-rmse:5.54390                                                    
[7]	validation-rmse:5.53913                                                    
[8]	validation-rmse:5.53273                                                    
[9]	validation-rmse:5.52185                                                    
[10]	validation-rmse:5.51772                                                   
[11]	validation-rmse:5.51319                                                   
[12]	validation-rmse:5.51198                                                   
[13]	validation-rmse:5.50657            

  xgb_model.save_model(model_data_path)




 98%|█████████▊| 49/50 [34:27<00:33, 33.65s/trial, best loss: 5.30110649565218]

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:7.36505                                                    
[1]	validation-rmse:6.45505                                                    
[2]	validation-rmse:6.00475                                                    
[3]	validation-rmse:5.78528                                                    
[4]	validation-rmse:5.67402                                                    
[5]	validation-rmse:5.60719                                                    
[6]	validation-rmse:5.56865                                                    
[7]	validation-rmse:5.54574                                                    
[8]	validation-rmse:5.53078                                                    
[9]	validation-rmse:5.51774                                                    
[10]	validation-rmse:5.50732                                                   
[11]	validation-rmse:5.50074                                                   
[12]	validation-rmse:5.49425            

  xgb_model.save_model(model_data_path)




100%|██████████| 50/50 [34:51<00:00, 41.83s/trial, best loss: 5.30110649565218]


### Using the best model
#### 1. Load model as a python function or PyFuncmodel
#### 2. Load model as a Xgboost model

##### We can take any of the runs or any of the variations to use it for predctions either as a Python function or as an object of XGboost or whichever model we train. As shown above in the MLflow UI, PyFuncModel allows us to use Pandas DataFrame features for its prediction method, unlike XGBoost models which requires us to convert features into DMatrix type.

In [15]:
logged_model = 'runs:/5792742ef5324a609c91590ed7b96c7f/models_mlflow' ## this is the saved run that we have chosen
loaded_model = mlflow.pyfunc.load_model(logged_model)
loaded_model ## this will give us the path of the run

mlflow.pyfunc.loaded_model:
  artifact_path: models_mlflow
  flavor: mlflow.xgboost
  run_id: 5792742ef5324a609c91590ed7b96c7f

In [16]:
# preprocessed DataFrame with DictionaryVectorizer
# X_val = dv.transform(X_val_dict)

# convert X_val into DMatrix type
valid = xgb.DMatrix(X_val, label=y_val)
loaded_model.predict(X_val)

array([ 5.1470017, 26.56458  , 28.101507 , ..., 19.14343  , 16.275436 ,
       23.078476 ], dtype=float32)

In [17]:
# On the flip side, we can also load the model as XGBoost model:Since it is an XGBoost model, 
# we will need to use the DMatrix-typed feature inputs for our validation set:

xgb_model = mlflow.xgboost.load_model(logged_model)
xgb_model

<xgboost.core.Booster at 0x7e715305bb20>

In [None]:
## autolog is not working as my libstdc is very old. 

# mlflow.set_tag("additional_models", "others")
# mlflow.set_tag("run_by", "keyush")

#     ## we can also log a different params

# mlflow.sklearn.autolog()

# for model_class in (RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor, LinearSVR):

#     with mlflow.start_run():

#         mlflow.log_param("train-dataPath", "/workspaces/MLOps-Model-Development-to-Production-deployment/Data/green_tripdata_2025-01.parquet")
#         mlflow.log_param("val-dataPath", "/workspaces/MLOps-Model-Development-to-Production-deployment/Data/green_tripdata_2025-02.parquet")
#         mlflow.log_artifact("models_mlflow/preprocessor.b", artifact_path="preprocessor")

#         mlmodel = model_class()
#         mlmodel.fit(X_train, y_train)

#         y_pred = mlmodel.predict(X_val)
#         rmse = mean_squared_error(y_val, y_pred, squared=False)
#         mlflow.log_metric("rmse", rmse)


ImportError: /opt/conda/envs/mlflow-exp/bin/../lib/libstdc++.so.6: version `GLIBCXX_3.4.30' not found (required by /opt/conda/envs/mlflow-exp/lib/python3.10/site-packages/scipy/fft/_pocketfft/pypocketfft.cpython-310-x86_64-linux-gnu.so)