In [1]:
import pandas as pd
import pickle
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso

from sklearn.metrics import root_mean_squared_error

In [2]:
import mlflow

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("nyc-taxi-experimental")

<Experiment: artifact_location='/home/pastor/projects/mlops-zoomcamp/02_experimental_tracking/mlruns/2', creation_time=1748046038801, experiment_id='2', last_update_time=1748046038801, lifecycle_stage='active', name='nyc-taxi-experimental', tags={}>

In [3]:
!mlflow --version

mlflow, version 2.22.0


In [5]:
df = pd.read_parquet("../data/green/green_tripdata_2023-01.parquet")

In [6]:
df_val = pd.read_parquet("../data/green/green_tripdata_2023-02.parquet")

In [12]:
categorical = ['PULocationID', 'DOLocationID']
numerical = ['trip_distance']

df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
df.duration = df.duration.dt.total_seconds() / 60
df = df[df.duration >= 1 & (df.duration < 60)]

train_dicts = df[categorical + numerical].to_dict(orient='records')
dv = DictVectorizer()

X_train = dv.fit_transform(train_dicts)

target = 'duration'
y_train = df[target].values

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.duration = df.duration.dt.total_seconds() / 60


In [15]:
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_train)

rmse = root_mean_squared_error(y_train, y_pred)

In [16]:
with open('models/lin_reg.bin', 'wb') as f_out:
    pickle.dump((dv, lr, f_out))

FileNotFoundError: [Errno 2] No such file or directory: 'models/lin_reg.bin'

In [11]:
with mlflow.start_run():
    
    mlflow.set_tag("developer", "pastor")
    mlflow.log_param("train-data-path", "../data/green/green_tripdata_2023-01.parquet")
    

    
    lr = LinearRegression()
    lr.fit(X_train, y_train)
    
    y_pred = lr.predict(X_train)
    
    rmse = root_mean_squared_error(y_train, y_pred)
    mlflow.log_metric("rmse", rmse)
    mlflow.log_artifact(local_path="models/lin_reg.bin", artifact_path="models_pickle/")

MlflowException: Invalid artifact path: 'models_pickle/'. Names may be treated as files in certain cases, and must not resolve to other names when treated as such. This name would resolve to 'models_pickle'

In [17]:
def preprocess(df: pd.DataFrame, dv: DictVectorizer, fit_dv: bool = False):
    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.dt.total_seconds() / 60
    df = df[df.duration >= 1 & (df.duration < 60)]
    
    categorical = ['PULocationID', 'DOLocationID']
    numerical = ['trip_distance']
    target = 'duration'
    
    df[categorical] = df[categorical].astype(str)

    
    y = df[target].values

    dicts = df[categorical + numerical].to_dict(orient='records')
    if fit_dv:
        X = dv.fit_transform(dicts)
    else:
        X = dv.transform(dicts)
    return X, y, dv


In [18]:
dv = DictVectorizer()
X_train, y_train, dv = preprocess(df, dv, fit_dv=True)

In [19]:
X_val, y_val, _ = preprocess(df_val, dv, fit_dv=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[categorical] = df[categorical].astype(str)


In [20]:
import xgboost as xgb

from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [21]:
train = xgb.DMatrix(X_train, label=y_train)

In [22]:
valid = xgb.DMatrix(X_val, label=y_val)

In [40]:
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)
        booster = xgb.train(
            params=params,
            dtrain=train,
            num_boost_round=1000,
            evals=[(valid, "validation")],
            early_stopping_rounds=50,
        )
        y_pred = booster.predict(valid)
        rmse = root_mean_squared_error(y_val, y_pred)
        mlflow.log_metric("rmse", rmse)
    return {'loss': rmse, 'status': STATUS_OK}
            

In [44]:
search_space = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    'learning_rate': hp.loguniform('learning_rate', -3, 0),  # exp(-3), exp(0) [0.005 - 1]
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),  # exp(-5), exp(-1) 0.00001 - 0.1
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),  # exp(-6), exp(-1) 0.000001 - 0.1
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    'objevtive': 'reg_linear',
    'seed': 42
}

best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials())
    

  0%|                                                                                                                 | 0/50 [00:00<?, ?trial/s, best loss=?]

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:70.04574                                                                                                                                 
[1]	validation-rmse:70.13097                                                                                                                                 
[2]	validation-rmse:70.33594                                                                                                                                 
[3]	validation-rmse:70.58692                                                                                                                                 
[4]	validation-rmse:70.90579                                                                                                                                 
[5]	validation-rmse:71.31401                                                                                                                                 
[6]	validation-rmse:71.72145                        

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[2]	validation-rmse:69.89906                                                                                                                                 
[3]	validation-rmse:69.88788                                                                                                                                 
[4]	validation-rmse:69.88548                                                                                                                                 
[5]	validation-rmse:69.90460                                                                                                                                 
[6]	validation-rmse:69.92343                                                                                                                                 
[7]	validation-rmse:69.95123                                                                                                                                 
[8]	validation-rmse:69.98697                        

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:70.81926                                                                                                                                 
[2]	validation-rmse:71.59645                                                                                                                                 
[3]	validation-rmse:72.32986                                                                                                                                 
[4]	validation-rmse:72.99891                                                                                                                                 
[5]	validation-rmse:73.55081                                                                                                                                 
[6]	validation-rmse:74.02018                                                                                                                                 
[7]	validation-rmse:74.33984                        

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[2]	validation-rmse:70.01827                                                                                                                                 
[3]	validation-rmse:70.15649                                                                                                                                 
[4]	validation-rmse:70.31880                                                                                                                                 
[5]	validation-rmse:70.45201                                                                                                                                 
[6]	validation-rmse:70.57926                                                                                                                                 
[7]	validation-rmse:70.69113                                                                                                                                 
[8]	validation-rmse:70.77290                        

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:70.08339                                                                                                                                 
[1]	validation-rmse:70.33588                                                                                                                                 
[2]	validation-rmse:70.81011                                                                                                                                 
[3]	validation-rmse:71.29526                                                                                                                                 
[4]	validation-rmse:71.83622                                                                                                                                 
[5]	validation-rmse:72.48344                                                                                                                                 
[6]	validation-rmse:72.99319                        

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:69.99044
[1]	validation-rmse:70.21905                                                                                                                                 
[2]	validation-rmse:70.51440                                                                                                                                 
[3]	validation-rmse:70.86739                                                                                                                                 
[4]	validation-rmse:71.15647                                                                                                                                 
[5]	validation-rmse:71.41003                                                                                                                                 
[6]	validation-rmse:71.72386                                                                                                                                 
[7]	validation-rmse:72.

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:70.94924                                                                                                                                 
[1]	validation-rmse:71.94888                                                                                                                                 
[2]	validation-rmse:72.73755                                                                                                                                 
[3]	validation-rmse:73.38538                                                                                                                                 
[4]	validation-rmse:73.97324                                                                                                                                 
[5]	validation-rmse:74.33324                                                                                                                                 
[6]	validation-rmse:74.64505                        

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[2]	validation-rmse:73.38157                                                                                                                
[3]	validation-rmse:74.09117                                                                                                                
[4]	validation-rmse:75.31558                                                                                                                
[5]	validation-rmse:76.09352                                                                                                                
[6]	validation-rmse:77.03958                                                                                                                
[7]	validation-rmse:77.67348                                                                                                                
[8]	validation-rmse:77.91891                                                                                                                
[9]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:70.66528                                                                                                                
[1]	validation-rmse:72.01752                                                                                                                
[2]	validation-rmse:73.34710                                                                                                                
[3]	validation-rmse:74.66773                                                                                                                
[4]	validation-rmse:75.71817                                                                                                                
[5]	validation-rmse:76.69469                                                                                                                
[6]	validation-rmse:77.39790                                                                                                                
[7]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:70.05867                                                                                                                
[1]	validation-rmse:70.10722                                                                                                                
[2]	validation-rmse:70.20821                                                                                                                
[3]	validation-rmse:70.37695                                                                                                                
[4]	validation-rmse:70.57776                                                                                                                
[5]	validation-rmse:70.80855                                                                                                                
[6]	validation-rmse:71.04459                                                                                                                
[7]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:71.82414                                                                                                                
[1]	validation-rmse:73.07333                                                                                                                
[2]	validation-rmse:73.63269                                                                                                                
[3]	validation-rmse:74.17811                                                                                                                
[4]	validation-rmse:74.24403                                                                                                                
[5]	validation-rmse:74.70996                                                                                                                
[6]	validation-rmse:74.71485                                                                                                                
[7]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:69.86212                                                                                                                
[2]	validation-rmse:69.90918                                                                                                                
[3]	validation-rmse:69.96038                                                                                                                
[4]	validation-rmse:70.06201                                                                                                                
[5]	validation-rmse:70.15761                                                                                                                
[6]	validation-rmse:70.25681                                                                                                                
[7]	validation-rmse:70.33755                                                                                                                
[8]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:72.43945                                                                                                                
[2]	validation-rmse:72.71579                                                                                                                
[3]	validation-rmse:72.82495                                                                                                                
[4]	validation-rmse:72.84536                                                                                                                
[5]	validation-rmse:72.93882                                                                                                                
[6]	validation-rmse:73.04508                                                                                                                
[7]	validation-rmse:73.19545                                                                                                                
[8]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:70.60863                                                                                                                
[1]	validation-rmse:72.12117                                                                                                                
[2]	validation-rmse:73.99236                                                                                                                
[3]	validation-rmse:75.76772                                                                                                                
[4]	validation-rmse:77.29240                                                                                                                
[5]	validation-rmse:78.59105                                                                                                                
[6]	validation-rmse:79.64497                                                                                                                
[7]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[2]	validation-rmse:72.25392                                                                                                                
[3]	validation-rmse:73.10430                                                                                                                
[4]	validation-rmse:73.60756                                                                                                                
[5]	validation-rmse:74.22703                                                                                                                
[6]	validation-rmse:74.55844                                                                                                                
[7]	validation-rmse:74.99536                                                                                                                
[8]	validation-rmse:75.09771                                                                                                                
[9]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:70.86325                                                                                                                
[1]	validation-rmse:72.40428                                                                                                                
[2]	validation-rmse:73.84763                                                                                                                
[3]	validation-rmse:75.22758                                                                                                                
[4]	validation-rmse:76.31795                                                                                                                
[5]	validation-rmse:77.22763                                                                                                                
[6]	validation-rmse:77.78446                                                                                                                
[7]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[10]	validation-rmse:77.52733                                                                                                               
[11]	validation-rmse:77.57017                                                                                                               
[12]	validation-rmse:77.80104                                                                                                               
[13]	validation-rmse:78.77294                                                                                                               
[14]	validation-rmse:79.50134                                                                                                               
[15]	validation-rmse:79.62371                                                                                                               
[16]	validation-rmse:79.85758                                                                                                               
[17]	validati

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:70.01049                                                                                                                
[2]	validation-rmse:70.02347                                                                                                                
[3]	validation-rmse:70.06782                                                                                                                
[4]	validation-rmse:70.14098                                                                                                                
[5]	validation-rmse:70.23600                                                                                                                
[6]	validation-rmse:70.34937                                                                                                                
[7]	validation-rmse:70.46554                                                                                                                
[8]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:70.93714                                                                                                                
[1]	validation-rmse:73.29149                                                                                                                
[2]	validation-rmse:75.50715                                                                                                                
[3]	validation-rmse:77.42128                                                                                                                
[4]	validation-rmse:79.05204                                                                                                                
[5]	validation-rmse:80.26668                                                                                                                
[6]	validation-rmse:81.14251                                                                                                                
[7]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[9]	validation-rmse:71.85347                                                                                                                
[10]	validation-rmse:71.95180                                                                                                               
[11]	validation-rmse:72.09988                                                                                                               
[12]	validation-rmse:72.20402                                                                                                               
[13]	validation-rmse:72.27418                                                                                                               
[14]	validation-rmse:72.28988                                                                                                               
[15]	validation-rmse:72.24211                                                                                                               
[16]	validati

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:69.89671                                                                                                                
[2]	validation-rmse:69.87573                                                                                                                
[3]	validation-rmse:69.87925                                                                                                                
[4]	validation-rmse:69.89175                                                                                                                
[5]	validation-rmse:69.91648                                                                                                                
[6]	validation-rmse:69.96822                                                                                                                
[7]	validation-rmse:70.01862                                                                                                                
[8]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:69.99617                                                                                                                
[1]	validation-rmse:69.94223                                                                                                                
[2]	validation-rmse:69.93472                                                                                                                
[3]	validation-rmse:69.95076                                                                                                                
[4]	validation-rmse:69.98276                                                                                                                
[5]	validation-rmse:70.03651                                                                                                                
[6]	validation-rmse:70.09489                                                                                                                
[7]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:70.00411                                                                                                                
[1]	validation-rmse:70.02235                                                                                                                
[2]	validation-rmse:70.12991                                                                                                                
[3]	validation-rmse:70.27147                                                                                                                
[4]	validation-rmse:70.45523                                                                                                                
[5]	validation-rmse:70.63707                                                                                                                
[6]	validation-rmse:70.84908                                                                                                                
[7]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:69.93542                                                                                                                
[2]	validation-rmse:69.95286                                                                                                                
[3]	validation-rmse:69.99940                                                                                                                
[4]	validation-rmse:70.06226                                                                                                                
[5]	validation-rmse:70.14073                                                                                                                
[6]	validation-rmse:70.22678                                                                                                                
[7]	validation-rmse:70.31591                                                                                                                
[8]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:70.01563                                                                                                                
[1]	validation-rmse:69.96372                                                                                                                
[2]	validation-rmse:69.94830                                                                                                                
[3]	validation-rmse:69.95038                                                                                                                
[4]	validation-rmse:69.96172                                                                                                                
[5]	validation-rmse:69.98810                                                                                                                
[6]	validation-rmse:70.02580                                                                                                                
[7]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:70.00868                                                                                                                
[1]	validation-rmse:70.12100                                                                                                                
[2]	validation-rmse:70.36539                                                                                                                
[3]	validation-rmse:70.63567                                                                                                                
[4]	validation-rmse:70.93826                                                                                                                
[5]	validation-rmse:71.22532                                                                                                                
[6]	validation-rmse:71.50078                                                                                                                
[7]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:69.86448                                                                                                                
[2]	validation-rmse:69.88183                                                                                                                
[3]	validation-rmse:69.91306                                                                                                                
[4]	validation-rmse:69.96435                                                                                                                
[5]	validation-rmse:70.03472                                                                                                                
[6]	validation-rmse:70.10862                                                                                                                
[7]	validation-rmse:70.15554                                                                                                                
[8]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[15]	validation-rmse:69.71539                                                                                                               
[16]	validation-rmse:69.70945                                                                                                               
[17]	validation-rmse:69.70689                                                                                                               
[18]	validation-rmse:69.70095                                                                                                               
[19]	validation-rmse:69.69942                                                                                                               
[20]	validation-rmse:69.69602                                                                                                               
[21]	validation-rmse:69.69459                                                                                                               
[22]	validati

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[16]	validation-rmse:69.68449                                                                                                               
[17]	validation-rmse:69.67694                                                                                                               
[18]	validation-rmse:69.67021                                                                                                               
[19]	validation-rmse:69.66717                                                                                                               
[20]	validation-rmse:69.66185                                                                                                               
[21]	validation-rmse:69.65695                                                                                                               
[22]	validation-rmse:69.65354                                                                                                               
[23]	validati

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[14]	validation-rmse:69.69626                                                                                                               
[15]	validation-rmse:69.68710                                                                                                               
[16]	validation-rmse:69.68007                                                                                                               
[17]	validation-rmse:69.67383                                                                                                               
[18]	validation-rmse:69.66694                                                                                                               
[19]	validation-rmse:69.66157                                                                                                               
[20]	validation-rmse:69.65629                                                                                                               
[21]	validati

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[8]	validation-rmse:69.92813                                                                                                                
[9]	validation-rmse:69.94054                                                                                                                
[10]	validation-rmse:69.94232                                                                                                               
[11]	validation-rmse:69.94915                                                                                                               
[12]	validation-rmse:69.94835                                                                                                               
[13]	validation-rmse:69.95808                                                                                                               
[14]	validation-rmse:69.97117                                                                                                               
[15]	validati

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:70.00859                                                                                                                
[2]	validation-rmse:70.06869                                                                                                                
[3]	validation-rmse:70.16201                                                                                                                
[4]	validation-rmse:70.30696                                                                                                                
[5]	validation-rmse:70.47239                                                                                                                
[6]	validation-rmse:70.65910                                                                                                                
[7]	validation-rmse:70.81587                                                                                                                
[8]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[13]	validation-rmse:69.77386                                                                                                               
[14]	validation-rmse:69.76629                                                                                                               
[15]	validation-rmse:69.76783                                                                                                               
[16]	validation-rmse:69.76827                                                                                                               
[17]	validation-rmse:69.76905                                                                                                               
[18]	validation-rmse:69.77336                                                                                                               
[19]	validation-rmse:69.77915                                                                                                               
[20]	validati

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:72.26930                                                                                                                
[2]	validation-rmse:73.01983                                                                                                                
[3]	validation-rmse:73.64587                                                                                                                
[4]	validation-rmse:74.03313                                                                                                                
[5]	validation-rmse:74.20100                                                                                                                
[6]	validation-rmse:74.45058                                                                                                                
[7]	validation-rmse:74.50484                                                                                                                
[8]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[15]	validation-rmse:69.68448                                                                                                               
[16]	validation-rmse:69.68787                                                                                                               
[17]	validation-rmse:69.68683                                                                                                               
[18]	validation-rmse:69.68543                                                                                                               
[19]	validation-rmse:69.68830                                                                                                               
[20]	validation-rmse:69.69072                                                                                                               
[21]	validation-rmse:69.69067                                                                                                               
[22]	validati

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[6]	validation-rmse:70.14253                                                                                                                
[7]	validation-rmse:70.17815                                                                                                                
[8]	validation-rmse:70.25175                                                                                                                
[9]	validation-rmse:70.29443                                                                                                                
[10]	validation-rmse:70.35100                                                                                                               
[11]	validation-rmse:70.38974                                                                                                               
[12]	validation-rmse:70.45025                                                                                                               
[13]	validati

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:70.08152                                                                                                                
[1]	validation-rmse:70.37143                                                                                                                
[2]	validation-rmse:70.88591                                                                                                                
[3]	validation-rmse:71.55030                                                                                                                
[4]	validation-rmse:72.17846                                                                                                                
[5]	validation-rmse:72.86040                                                                                                                
[6]	validation-rmse:73.56275                                                                                                                
[7]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[3]	validation-rmse:69.96598                                                                                                                
[4]	validation-rmse:69.98757                                                                                                                
[5]	validation-rmse:70.03094                                                                                                                
[6]	validation-rmse:70.07859                                                                                                                
[7]	validation-rmse:70.13415                                                                                                                
[8]	validation-rmse:70.19675                                                                                                                
[9]	validation-rmse:70.25477                                                                                                                
[10]	validati

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[5]	validation-rmse:69.96339                                                                                                                
[6]	validation-rmse:69.97777                                                                                                                
[7]	validation-rmse:69.99830                                                                                                                
[8]	validation-rmse:70.03224                                                                                                                
[9]	validation-rmse:70.06550                                                                                                                
[10]	validation-rmse:70.09622                                                                                                               
[11]	validation-rmse:70.12676                                                                                                               
[12]	validati

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:70.24715                                                                                                                
[1]	validation-rmse:70.89942                                                                                                                
[2]	validation-rmse:71.66279                                                                                                                
[3]	validation-rmse:72.56846                                                                                                                
[4]	validation-rmse:73.38293                                                                                                                
[5]	validation-rmse:74.24558                                                                                                                
[6]	validation-rmse:74.89351                                                                                                                
[7]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[17]	validation-rmse:69.69001                                                                                                               
[18]	validation-rmse:69.70120                                                                                                               
[19]	validation-rmse:69.72143                                                                                                               
[20]	validation-rmse:69.72873                                                                                                               
[21]	validation-rmse:69.75229                                                                                                               
[22]	validation-rmse:69.76381                                                                                                               
[23]	validation-rmse:69.77489                                                                                                               
[24]	validati

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[10]	validation-rmse:69.95860                                                                                                               
[11]	validation-rmse:69.97256                                                                                                               
[12]	validation-rmse:69.99359                                                                                                               
[13]	validation-rmse:70.02026                                                                                                               
[14]	validation-rmse:70.04078                                                                                                               
[15]	validation-rmse:70.05239                                                                                                               
[16]	validation-rmse:70.06385                                                                                                               
[17]	validati

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:70.00241                                                                                                                
[2]	validation-rmse:70.01219                                                                                                                
[3]	validation-rmse:70.05003                                                                                                                
[4]	validation-rmse:70.10896                                                                                                                
[5]	validation-rmse:70.18850                                                                                                                
[6]	validation-rmse:70.28089                                                                                                                
[7]	validation-rmse:70.39529                                                                                                                
[8]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:70.04229                                                                                                                
[1]	validation-rmse:70.01626                                                                                                                
[2]	validation-rmse:70.02227                                                                                                                
[3]	validation-rmse:70.05040                                                                                                                
[4]	validation-rmse:70.10597                                                                                                                
[5]	validation-rmse:70.17982                                                                                                                
[6]	validation-rmse:70.27494                                                                                                                
[7]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[4]	validation-rmse:70.19255                                                                                                                
[5]	validation-rmse:70.31063                                                                                                                
[6]	validation-rmse:70.44799                                                                                                                
[7]	validation-rmse:70.55946                                                                                                                
[8]	validation-rmse:70.69677                                                                                                                
[9]	validation-rmse:70.81602                                                                                                                
[10]	validation-rmse:70.91438                                                                                                               
[11]	validati

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[1]	validation-rmse:70.86480                                                                                                                
[2]	validation-rmse:71.21871                                                                                                                
[3]	validation-rmse:71.39824                                                                                                                
[4]	validation-rmse:71.58853                                                                                                                
[5]	validation-rmse:71.67979                                                                                                                
[6]	validation-rmse:71.87953                                                                                                                
[7]	validation-rmse:72.01822                                                                                                                
[8]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:70.19281                                                                                                                
[1]	validation-rmse:70.70447                                                                                                                
[2]	validation-rmse:71.39292                                                                                                                
[3]	validation-rmse:72.16406                                                                                                                
[4]	validation-rmse:72.87249                                                                                                                
[5]	validation-rmse:73.66248                                                                                                                
[6]	validation-rmse:74.32693                                                                                                                
[7]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[0]	validation-rmse:71.32241                                                                                                                
[1]	validation-rmse:73.25065                                                                                                                
[2]	validation-rmse:74.95032                                                                                                                
[3]	validation-rmse:76.29575                                                                                                                
[4]	validation-rmse:77.14286                                                                                                                
[5]	validation-rmse:77.70828                                                                                                                
[6]	validation-rmse:78.13244                                                                                                                
[7]	validatio

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[3]	validation-rmse:69.98641                                                                                                                
[4]	validation-rmse:70.01267                                                                                                                
[5]	validation-rmse:70.05697                                                                                                                
[6]	validation-rmse:70.10391                                                                                                                
[7]	validation-rmse:70.16748                                                                                                                
[8]	validation-rmse:70.23316                                                                                                                
[9]	validation-rmse:70.29511                                                                                                                
[10]	validati

Parameters: { "objevtive" } are not used.

  self.starting_round = model.num_boosted_rounds()



[8]	validation-rmse:70.09183                                                                                                                
[9]	validation-rmse:70.11602                                                                                                                
[10]	validation-rmse:70.15310                                                                                                               
[11]	validation-rmse:70.19078                                                                                                               
[12]	validation-rmse:70.22945                                                                                                               
[13]	validation-rmse:70.24185                                                                                                               
[14]	validation-rmse:70.25423                                                                                                               
[15]	validati

In [31]:
with mlflow.start_run():  
    parameters = {
        'learning_rate': 0.05380677154145605,
        'max_depth': 4,
        'min_child_weight': 3.754106686202785,
        'objective': 'reg:linear',  # Fixed typo from 'objevtive'
        'reg_alpha': 0.07023067397868384,
        'reg_lambda': 0.0024864712355091413,
        'seed': 42
    }
    
    #mlflow.xgboost.autolog()
    mlflow.log_params(parameters)
    booster = xgb.train(
        params = parameters,
        dtrain = train,
        num_boost_round = 100,
        evals = [(valid, "validation")],
        early_stopping_rounds = 50
    )
    
    y_pred = booster.predict(valid)
    rmse = root_mean_squared_error(y_val, y_pred)
    mlflow.log_metric("rmse", rmse)
    
    with open("mlruns/models/preprocessor.b", "wb") as f_out:
        pickle.dump(dv, f_out)
        
    mlflow.log_artifact("mlruns/models/preprocessor.b", artifact_path="preprocessor")
    mlflow.xgboost.log_model(booster, artifact_path="models_mlflow")

[0]	validation-rmse:70.04860
[1]	validation-rmse:69.99760
[2]	validation-rmse:69.95275
[3]	validation-rmse:69.91017
[4]	validation-rmse:69.87780
[5]	validation-rmse:69.85012
[6]	validation-rmse:69.82176
[7]	validation-rmse:69.80178
[8]	validation-rmse:69.78116
[9]	validation-rmse:69.76031
[10]	validation-rmse:69.74763
[11]	validation-rmse:69.73369
[12]	validation-rmse:69.71935
[13]	validation-rmse:69.70630
[14]	validation-rmse:69.69626
[15]	validation-rmse:69.68710
[16]	validation-rmse:69.68007
[17]	validation-rmse:69.67383


  self.starting_round = model.num_boosted_rounds()


[18]	validation-rmse:69.66694
[19]	validation-rmse:69.66157
[20]	validation-rmse:69.65629
[21]	validation-rmse:69.65479
[22]	validation-rmse:69.65368
[23]	validation-rmse:69.65123
[24]	validation-rmse:69.64921
[25]	validation-rmse:69.64640
[26]	validation-rmse:69.64613
[27]	validation-rmse:69.64656
[28]	validation-rmse:69.64444
[29]	validation-rmse:69.64570
[30]	validation-rmse:69.64640
[31]	validation-rmse:69.64216
[32]	validation-rmse:69.64059
[33]	validation-rmse:69.63693
[34]	validation-rmse:69.63811
[35]	validation-rmse:69.63782
[36]	validation-rmse:69.63513
[37]	validation-rmse:69.63615
[38]	validation-rmse:69.63373
[39]	validation-rmse:69.62959
[40]	validation-rmse:69.62692
[41]	validation-rmse:69.62686
[42]	validation-rmse:69.62479
[43]	validation-rmse:69.62387
[44]	validation-rmse:69.62030
[45]	validation-rmse:69.62236
[46]	validation-rmse:69.62079
[47]	validation-rmse:69.61887
[48]	validation-rmse:69.61929
[49]	validation-rmse:69.62207
[50]	validation-rmse:69.62125
[51]	valid

  xgb_model.save_model(model_data_path)


In [32]:
logged_model = 'runs:/b1d6141c84984ae783deccf199d5e749/models_mlflow'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

In [33]:
loaded_model

mlflow.pyfunc.loaded_model:
  artifact_path: models_mlflow
  flavor: mlflow.xgboost
  run_id: b1d6141c84984ae783deccf199d5e749

In [34]:
xgboost_model = mlflow.xgboost.load_model(logged_model)

In [35]:
xgboost_model

<xgboost.core.Booster at 0x7db731cb0380>

In [36]:
xgboost_model.predict(valid)

array([39.895706, 28.665522, 31.035696, ..., 29.027748, 24.789118,
       20.100262], shape=(63185,), dtype=float32)

## Deployment

In [1]:
from mlflow.tracking import MlflowClient

MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"

client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

In [9]:
client.create_experiment(name="my-cool-experiment")

'5'

To get the best run methods you can use:

In [11]:
from mlflow.entities import ViewType
runs = client.search_runs(
    experiment_ids=1,
    filter_string = "",
    run_view_type = ViewType.ACTIVE_ONLY,
    max_results = 5,
    order_by = ["metrics.rmse ASC"]
    
)

In [14]:
for run in runs:
    print(f"run id: {run.info.run_id}, rmse {run.data.metrics['rmse']:.4f}")

run id: 5e2da22b91d345bfa1deb2d66ecb87e8, rmse 5.4312


## Promote

In [17]:
import mlflow

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [19]:
run_id = "5e2da22b91d345bfa1deb2d66ecb87e8"
model_uri = f"runs:/{run_id}/model"
mlflow.register_model(model_uri=model_uri, name="nyc-regressor")

Successfully registered model 'nyc-regressor'.
Created version '1' of model 'nyc-regressor'.


<ModelVersion: aliases=[], creation_timestamp=1748200577919, current_stage='None', description=None, last_updated_timestamp=1748200577919, name='nyc-regressor', run_id='5e2da22b91d345bfa1deb2d66ecb87e8', run_link=None, source='/home/pastor/projects/mlops-zoomcamp/02_experimental_tracking/mlruns/1/5e2da22b91d345bfa1deb2d66ecb87e8/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>

In [1]:
import mlflow

In [2]:
print(f"tracking URI: '{mlflow.get_tracking_uri()}'")

tracking URI: 'file:///home/pastor/projects/mlops-zoomcamp/02_experimental_tracking/mlruns'


In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

mlflow.set_experiment("my-experiment-1")

with mlflow.start_run():

    X, y = load_iris(return_X_y=True)

    params = {"C": 0.1, "random_state": 42}
    mlflow.log_params(params)

    lr = LogisticRegression(**params).fit(X, y)
    y_pred = lr.predict(X)
    mlflow.log_metric("accuracy", accuracy_score(y, y_pred))

    mlflow.sklearn.log_model(lr, artifact_path="models")
    print(f"default artifacts URI: '{mlflow.get_artifact_uri()}'")

Traceback (most recent call last):
  File "/home/pastor/projects/mlops-zoomcamp/.venv/lib/python3.13/site-packages/mlflow/store/tracking/file_store.py", line 329, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/home/pastor/projects/mlops-zoomcamp/.venv/lib/python3.13/site-packages/mlflow/store/tracking/file_store.py", line 427, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/home/pastor/projects/mlops-zoomcamp/.venv/lib/python3.13/site-packages/mlflow/store/tracking/file_store.py", line 1373, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/home/pastor/projects/mlops-zoomcamp/.venv/lib/python3.13/site-packages/mlflow/store/tracking/file_store.py", line 1366, in _read_helper
    result = read_yaml(root, file_name)
  File "/home/pastor/projects/mlops-zoomcamp/.venv/lib/python3.13/site-packages/mlflow/utils/file_utils.py", line 310, in read_yaml
    raise 

default artifacts URI: 'file:///home/pastor/projects/mlops-zoomcamp/02_experimental_tracking/mlruns/614094180776244277/35fcfc65b87d4f9389e1a6fa5ae53f88/artifacts'


In [9]:
mlflow.search_experiments()

Traceback (most recent call last):
  File "/home/pastor/projects/mlops-zoomcamp/.venv/lib/python3.13/site-packages/mlflow/store/tracking/file_store.py", line 329, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/home/pastor/projects/mlops-zoomcamp/.venv/lib/python3.13/site-packages/mlflow/store/tracking/file_store.py", line 427, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/home/pastor/projects/mlops-zoomcamp/.venv/lib/python3.13/site-packages/mlflow/store/tracking/file_store.py", line 1373, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/home/pastor/projects/mlops-zoomcamp/.venv/lib/python3.13/site-packages/mlflow/store/tracking/file_store.py", line 1366, in _read_helper
    result = read_yaml(root, file_name)
  File "/home/pastor/projects/mlops-zoomcamp/.venv/lib/python3.13/site-packages/mlflow/utils/file_utils.py", line 310, in read_yaml
    raise 

[<Experiment: artifact_location='file:///home/pastor/projects/mlops-zoomcamp/02_experimental_tracking/mlruns/614094180776244277', creation_time=1748208853232, experiment_id='614094180776244277', last_update_time=1748208853232, lifecycle_stage='active', name='my-experiment-1', tags={}>]

In [10]:
from mlflow.tracking import MlflowClient


client = MlflowClient()

In [16]:
from mlflow.exceptions import MlflowException

try:
    print(client.search_registered_models())
except MlflowException:
    print("It's not possible to access the model registry :(")

[]


## Scenario 2: server local

In [1]:
import mlflow

mlflow.set_tracking_uri("http://127.0.0.1:5000")

In [2]:
print(f"tracking URI: '{mlflow.get_tracking_uri()}'")

tracking URI: 'http://127.0.0.1:5000'


In [3]:
mlflow.search_experiments()

[<Experiment: artifact_location='/home/pastor/projects/mlops-zoomcamp/02_experimental_tracking/artifacts_local/0', creation_time=1748225585457, experiment_id='0', last_update_time=1748225585457, lifecycle_stage='active', name='Default', tags={}>]

In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

mlflow.set_experiment("my-experiment-1")

with mlflow.start_run():

    X, y = load_iris(return_X_y=True)

    params = {"C": 0.1, "random_state": 42}
    mlflow.log_params(params)

    lr = LogisticRegression(**params).fit(X, y)
    y_pred = lr.predict(X)
    mlflow.log_metric("accuracy", accuracy_score(y, y_pred))

    mlflow.sklearn.log_model(lr, artifact_path="models")
    print(f"default artifacts URI: '{mlflow.get_artifact_uri()}'")

2025/05/26 02:19:47 INFO mlflow.tracking.fluent: Experiment with name 'my-experiment-1' does not exist. Creating a new experiment.


default artifacts URI: '/home/pastor/projects/mlops-zoomcamp/02_experimental_tracking/artifacts_local/1/e18eb4175e18465b84b8638f4c1e9b69/artifacts'
🏃 View run youthful-auk-57 at: http://127.0.0.1:5000/#/experiments/1/runs/e18eb4175e18465b84b8638f4c1e9b69
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1


In [5]:
mlflow.search_experiments()

[<Experiment: artifact_location='/home/pastor/projects/mlops-zoomcamp/02_experimental_tracking/artifacts_local/1', creation_time=1748225987526, experiment_id='1', last_update_time=1748225987526, lifecycle_stage='active', name='my-experiment-1', tags={}>,
 <Experiment: artifact_location='/home/pastor/projects/mlops-zoomcamp/02_experimental_tracking/artifacts_local/0', creation_time=1748225585457, experiment_id='0', last_update_time=1748225585457, lifecycle_stage='active', name='Default', tags={}>]

## Interacting with model registry

In [6]:
from mlflow.tracking import MlflowClient
client = MlflowClient()

In [7]:
client.search_registered_models()

[]

In [23]:
run_id = client.search_runs(experiment_ids='1')[0].info.run_id
mlflow.register_model(
    model_uri=f"runs:/{run_id}/models",
    name='iris-classifier'
)

Successfully registered model 'iris-classifier'.
2025/05/26 02:27:14 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: iris-classifier, version 1
Created version '1' of model 'iris-classifier'.


<ModelVersion: aliases=[], creation_timestamp=1748226434774, current_stage='None', description='', last_updated_timestamp=1748226434774, name='iris-classifier', run_id='e18eb4175e18465b84b8638f4c1e9b69', run_link='', source='/home/pastor/projects/mlops-zoomcamp/02_experimental_tracking/artifacts_local/1/e18eb4175e18465b84b8638f4c1e9b69/artifacts/models', status='READY', status_message=None, tags={}, user_id='', version='1'>