In [1]:
!python3 -V

Python 3.10.4


In [2]:
!pwd

/home/cinnecta/Documents/cursos/mlops-zoomcamp/02-experiment-tracking


In [3]:
import pandas as pd

In [4]:
import pickle

In [5]:
import seaborn as sns
import matplotlib.pyplot as plt

In [6]:
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge

from sklearn.metrics import mean_squared_error

In [7]:
import mlflow

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("nyc-taxi-experiment")

2022/05/23 08:56:35 INFO mlflow.tracking.fluent: Experiment with name 'nyc-taxi-experiment' does not exist. Creating a new experiment.


<Experiment: artifact_location='./mlruns/1', experiment_id='1', lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>

In [8]:
def read_dataframe(filename):
    df = pd.read_parquet(filename)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df

In [9]:
df_train = read_dataframe('./data/green_tripdata_2021-01.parquet')
df_val = read_dataframe('./data/green_tripdata_2021-02.parquet')

In [10]:
len(df_train), len(df_val)

(73908, 61921)

In [11]:
df_train['PU_DO'] = df_train['PULocationID'] + '_' + df_train['DOLocationID']
df_val['PU_DO'] = df_val['PULocationID'] + '_' + df_val['DOLocationID']

In [12]:
categorical = ['PU_DO'] #'PULocationID', 'DOLocationID']
numerical = ['trip_distance']

dv = DictVectorizer()

train_dicts = df_train[categorical + numerical].to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)

val_dicts = df_val[categorical + numerical].to_dict(orient='records')
X_val = dv.transform(val_dicts)

In [13]:
target = 'duration'
y_train = df_train[target].values
y_val = df_val[target].values

In [14]:
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_val)

mean_squared_error(y_val, y_pred, squared=False)

7.758715208537182

In [15]:
with open('models/lin_reg.bin', 'wb') as f_out:
    pickle.dump((dv, lr), f_out)

In [16]:
with mlflow.start_run():

    mlflow.set_tag("developer", "lucas ")

    mlflow.log_param("train-data-path", "./data/green_tripdata_2021-01.parquet")
    mlflow.log_param("valid-data-path", "./data/green_tripdata_2021-02.parquet")

    alpha = 0.1
    mlflow.log_param("alpha", alpha)
    lr = Lasso(alpha)
    lr.fit(X_train, y_train)

    y_pred = lr.predict(X_val)
    rmse = mean_squared_error(y_val, y_pred, squared=False)
    mlflow.log_metric("rmse", rmse)

In [17]:
import xgboost as xgb



In [18]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [19]:
train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)

In [20]:
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)
        booster = xgb.train(
            params=params,
            dtrain=train,
            num_boost_round=1000,
            evals=[(valid, 'validation')],
            early_stopping_rounds=50
        )
        y_pred = booster.predict(valid)
        rmse = mean_squared_error(y_val, y_pred, squared=False)
        mlflow.log_metric("rmse", rmse)

    return {'loss': rmse, 'status': STATUS_OK}

In [None]:
search_space = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    'learning_rate': hp.loguniform('learning_rate', -3, 0),
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    'objective': 'reg:linear',
    'seed': 42
}

best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=5,
    trials=Trials()
)

[0]	validation-rmse:7.67819                                                                                                                                      
[1]	validation-rmse:6.76027                                                                                                                                      
[2]	validation-rmse:6.66701                                                                                                                                      
[3]	validation-rmse:6.63781                                                                                                                                      
[4]	validation-rmse:6.62730                                                                                                                                      
[5]	validation-rmse:6.61968                                                                                                                                      
[6]	validation-rmse:6.61640 

[49]	validation-rmse:6.56147                                                                                                                                     
[50]	validation-rmse:6.56005                                                                                                                                     
[51]	validation-rmse:6.55937                                                                                                                                     
[52]	validation-rmse:6.55955                                                                                                                                     
[53]	validation-rmse:6.56160                                                                                                                                     
[54]	validation-rmse:6.56049                                                                                                                                     
[55]	validation-rmse:6.56203

[99]	validation-rmse:6.58318                                                                                                                                     
[100]	validation-rmse:6.58479                                                                                                                                    
[0]	validation-rmse:15.94719                                                                                                                                     
[1]	validation-rmse:12.45802                                                                                                                                     
[2]	validation-rmse:10.20288                                                                                                                                     
[3]	validation-rmse:8.79797                                                                                                                                      
[4]	validation-rmse:7.94146 

[47]	validation-rmse:6.54304                                                                                                                                     
[48]	validation-rmse:6.54226                                                                                                                                     
[49]	validation-rmse:6.54215                                                                                                                                     
[50]	validation-rmse:6.54222                                                                                                                                     
[51]	validation-rmse:6.54179                                                                                                                                     
[52]	validation-rmse:6.54189                                                                                                                                     
[53]	validation-rmse:6.54206

[97]	validation-rmse:6.54130                                                                                                                                     
[98]	validation-rmse:6.54142                                                                                                                                     
[99]	validation-rmse:6.54170                                                                                                                                     
[100]	validation-rmse:6.54252                                                                                                                                    
[101]	validation-rmse:6.54286                                                                                                                                    
[102]	validation-rmse:6.54275                                                                                                                                    
[103]	validation-rmse:6.5420

[16]	validation-rmse:6.59424                                                                                                                                     
[17]	validation-rmse:6.57871                                                                                                                                     
[18]	validation-rmse:6.56839                                                                                                                                     
[19]	validation-rmse:6.55850                                                                                                                                     
[20]	validation-rmse:6.55058                                                                                                                                     
[21]	validation-rmse:6.54311                                                                                                                                     
[22]	validation-rmse:6.53704

[66]	validation-rmse:6.47976                                                                                                                                     
[67]	validation-rmse:6.47886                                                                                                                                     
[68]	validation-rmse:6.47875                                                                                                                                     
[69]	validation-rmse:6.47832                                                                                                                                     
[70]	validation-rmse:6.47794                                                                                                                                     
[71]	validation-rmse:6.47653                                                                                                                                     
[72]	validation-rmse:6.47573

[116]	validation-rmse:6.46360                                                                                                                                    
[117]	validation-rmse:6.46322                                                                                                                                    
[118]	validation-rmse:6.46300                                                                                                                                    
[119]	validation-rmse:6.46308                                                                                                                                    
[120]	validation-rmse:6.46254                                                                                                                                    
[121]	validation-rmse:6.46219                                                                                                                                    
[122]	validation-rmse:6.4621

[166]	validation-rmse:6.45922                                                                                                                                    
[167]	validation-rmse:6.45920                                                                                                                                    
[168]	validation-rmse:6.45884                                                                                                                                    
[169]	validation-rmse:6.45889                                                                                                                                    
[170]	validation-rmse:6.45881                                                                                                                                    
[171]	validation-rmse:6.45863                                                                                                                                    
[172]	validation-rmse:6.4590

[216]	validation-rmse:6.46100                                                                                                                                    
[217]	validation-rmse:6.46086                                                                                                                                    
[218]	validation-rmse:6.46076                                                                                                                                    
[219]	validation-rmse:6.46082                                                                                                                                    
[220]	validation-rmse:6.46053                                                                                                                                    
[0]	validation-rmse:19.97494                                                                                                                                     
[1]	validation-rmse:18.84831

[44]	validation-rmse:6.89173                                                                                                                                     
[45]	validation-rmse:6.87464                                                                                                                                     
[46]	validation-rmse:6.85871                                                                                                                                     
[47]	validation-rmse:6.84431                                                                                                                                     
[48]	validation-rmse:6.83086                                                                                                                                     
[49]	validation-rmse:6.81879                                                                                                                                     
[50]	validation-rmse:6.80759

[94]	validation-rmse:6.67073                                                                                                                                     
[95]	validation-rmse:6.67016                                                                                                                                     
[96]	validation-rmse:6.66976                                                                                                                                     
[97]	validation-rmse:6.66917                                                                                                                                     
[98]	validation-rmse:6.66870                                                                                                                                     
[99]	validation-rmse:6.66791                                                                                                                                     
[100]	validation-rmse:6.6673

[144]	validation-rmse:6.64940                                                                                                                                    
[145]	validation-rmse:6.64920                                                                                                                                    
[146]	validation-rmse:6.64896                                                                                                                                    
[147]	validation-rmse:6.64851                                                                                                                                    
[148]	validation-rmse:6.64832                                                                                                                                    
[149]	validation-rmse:6.64813                                                                                                                                    
[150]	validation-rmse:6.6478

[194]	validation-rmse:6.63644                                                                                                                                    
[195]	validation-rmse:6.63630                                                                                                                                    
[196]	validation-rmse:6.63614                                                                                                                                    
[197]	validation-rmse:6.63594                                                                                                                                    
[198]	validation-rmse:6.63578                                                                                                                                    
[199]	validation-rmse:6.63522                                                                                                                                    
[200]	validation-rmse:6.6351

[244]	validation-rmse:6.62640                                                                                                                                    
[245]	validation-rmse:6.62621                                                                                                                                    
[246]	validation-rmse:6.62596                                                                                                                                    
[247]	validation-rmse:6.62590                                                                                                                                    
[248]	validation-rmse:6.62575                                                                                                                                    
[249]	validation-rmse:6.62548                                                                                                                                    
[250]	validation-rmse:6.6254

[294]	validation-rmse:6.61786                                                                                                                                    
[295]	validation-rmse:6.61768                                                                                                                                    
[296]	validation-rmse:6.61760                                                                                                                                    
[297]	validation-rmse:6.61751                                                                                                                                    
[298]	validation-rmse:6.61732                                                                                                                                    
[299]	validation-rmse:6.61724                                                                                                                                    
[300]	validation-rmse:6.6170

[344]	validation-rmse:6.61120                                                                                                                                    
[345]	validation-rmse:6.61116                                                                                                                                    
[346]	validation-rmse:6.61099                                                                                                                                    
[347]	validation-rmse:6.61095                                                                                                                                    
[348]	validation-rmse:6.61077                                                                                                                                    
[349]	validation-rmse:6.61060                                                                                                                                    
[350]	validation-rmse:6.6104

[394]	validation-rmse:6.60563                                                                                                                                    
[395]	validation-rmse:6.60552                                                                                                                                    
[396]	validation-rmse:6.60546                                                                                                                                    
[397]	validation-rmse:6.60527                                                                                                                                    
[398]	validation-rmse:6.60513                                                                                                                                    
[399]	validation-rmse:6.60501                                                                                                                                    
[400]	validation-rmse:6.6049

[444]	validation-rmse:6.60027                                                                                                                                    
[445]	validation-rmse:6.60014                                                                                                                                    
[446]	validation-rmse:6.60010                                                                                                                                    
[447]	validation-rmse:6.60004                                                                                                                                    
[448]	validation-rmse:6.59992                                                                                                                                    
[449]	validation-rmse:6.59987                                                                                                                                    
[450]	validation-rmse:6.5997

[494]	validation-rmse:6.59606                                                                                                                                    
[495]	validation-rmse:6.59599                                                                                                                                    
[496]	validation-rmse:6.59594                                                                                                                                    
[497]	validation-rmse:6.59583                                                                                                                                    
[498]	validation-rmse:6.59580                                                                                                                                    
[499]	validation-rmse:6.59577                                                                                                                                    
[500]	validation-rmse:6.5956

[544]	validation-rmse:6.59229                                                                                                                                    
[545]	validation-rmse:6.59227                                                                                                                                    
[546]	validation-rmse:6.59218                                                                                                                                    
[547]	validation-rmse:6.59213                                                                                                                                    
[548]	validation-rmse:6.59203                                                                                                                                    
[549]	validation-rmse:6.59188                                                                                                                                    
[550]	validation-rmse:6.5917

[594]	validation-rmse:6.58857                                                                                                                                    
[595]	validation-rmse:6.58829                                                                                                                                    
[596]	validation-rmse:6.58827                                                                                                                                    
[597]	validation-rmse:6.58829                                                                                                                                    
[598]	validation-rmse:6.58826                                                                                                                                    
[599]	validation-rmse:6.58820                                                                                                                                    
[600]	validation-rmse:6.5881

[644]	validation-rmse:6.58554                                                                                                                                    
[645]	validation-rmse:6.58545                                                                                                                                    
[646]	validation-rmse:6.58541                                                                                                                                    
[647]	validation-rmse:6.58530                                                                                                                                    
[648]	validation-rmse:6.58527                                                                                                                                    
[649]	validation-rmse:6.58518                                                                                                                                    
[650]	validation-rmse:6.5851

[694]	validation-rmse:6.58284                                                                                                                                    
[695]	validation-rmse:6.58275                                                                                                                                    
[696]	validation-rmse:6.58264                                                                                                                                    
[697]	validation-rmse:6.58263                                                                                                                                    
[698]	validation-rmse:6.58249                                                                                                                                    
[699]	validation-rmse:6.58246                                                                                                                                    
[700]	validation-rmse:6.5824

[744]	validation-rmse:6.58022                                                                                                                                    
[745]	validation-rmse:6.58016                                                                                                                                    
[746]	validation-rmse:6.58012                                                                                                                                    
[747]	validation-rmse:6.58010                                                                                                                                    
[748]	validation-rmse:6.58007                                                                                                                                    
[749]	validation-rmse:6.58005                                                                                                                                    
[750]	validation-rmse:6.5800

[794]	validation-rmse:6.57796                                                                                                                                    
[795]	validation-rmse:6.57793                                                                                                                                    
[796]	validation-rmse:6.57786                                                                                                                                    
[797]	validation-rmse:6.57782                                                                                                                                    
[798]	validation-rmse:6.57778                                                                                                                                    
[799]	validation-rmse:6.57775                                                                                                                                    
[800]	validation-rmse:6.5777

[844]	validation-rmse:6.57585                                                                                                                                    
[845]	validation-rmse:6.57580                                                                                                                                    
[846]	validation-rmse:6.57576                                                                                                                                    
[847]	validation-rmse:6.57567                                                                                                                                    
[848]	validation-rmse:6.57576                                                                                                                                    
[849]	validation-rmse:6.57568                                                                                                                                    
[850]	validation-rmse:6.5756

[894]	validation-rmse:6.57419                                                                                                                                    
[895]	validation-rmse:6.57414                                                                                                                                    
[896]	validation-rmse:6.57406                                                                                                                                    
[897]	validation-rmse:6.57398                                                                                                                                    
[898]	validation-rmse:6.57395                                                                                                                                    
[899]	validation-rmse:6.57388                                                                                                                                    
[900]	validation-rmse:6.5738

[944]	validation-rmse:6.57233                                                                                                                                    
[945]	validation-rmse:6.57234                                                                                                                                    
[946]	validation-rmse:6.57234                                                                                                                                    
[947]	validation-rmse:6.57232                                                                                                                                    
[948]	validation-rmse:6.57230                                                                                                                                    
[949]	validation-rmse:6.57233                                                                                                                                    
[950]	validation-rmse:6.5723

[994]	validation-rmse:6.57053                                                                                                                                    
[995]	validation-rmse:6.57044                                                                                                                                    
[996]	validation-rmse:6.57041                                                                                                                                    
[997]	validation-rmse:6.57040                                                                                                                                    
[998]	validation-rmse:6.57038                                                                                                                                    
[999]	validation-rmse:6.57040                                                                                                                                    
[0]	validation-rmse:19.79869

[43]	validation-rmse:6.84439                                                                                                                                     
[44]	validation-rmse:6.83229                                                                                                                                     
[45]	validation-rmse:6.82171                                                                                                                                     
[46]	validation-rmse:6.81173                                                                                                                                     
[47]	validation-rmse:6.80158                                                                                                                                     
[48]	validation-rmse:6.79339                                                                                                                                     
[49]	validation-rmse:6.78583

[93]	validation-rmse:6.69283                                                                                                                                     
[94]	validation-rmse:6.69258                                                                                                                                     
[95]	validation-rmse:6.69224                                                                                                                                     
[96]	validation-rmse:6.69205                                                                                                                                     
[97]	validation-rmse:6.69169                                                                                                                                     
[98]	validation-rmse:6.69147                                                                                                                                     
[99]	validation-rmse:6.69106

[143]	validation-rmse:6.68446                                                                                                                                    
[144]	validation-rmse:6.68424                                                                                                                                    
[145]	validation-rmse:6.68425                                                                                                                                    
[146]	validation-rmse:6.68418                                                                                                                                    
[147]	validation-rmse:6.68406                                                                                                                                    
[148]	validation-rmse:6.68397                                                                                                                                    
[149]	validation-rmse:6.6838

[193]	validation-rmse:6.68130                                                                                                                                    
[194]	validation-rmse:6.68115                                                                                                                                    
[195]	validation-rmse:6.68109                                                                                                                                    
[196]	validation-rmse:6.68113                                                                                                                                    
[197]	validation-rmse:6.68096                                                                                                                                    
[198]	validation-rmse:6.68099                                                                                                                                    
[199]	validation-rmse:6.6810

[243]	validation-rmse:6.67844                                                                                                                                    
[244]	validation-rmse:6.67847                                                                                                                                    
[245]	validation-rmse:6.67839                                                                                                                                    
[246]	validation-rmse:6.67836                                                                                                                                    
[247]	validation-rmse:6.67832                                                                                                                                    
[248]	validation-rmse:6.67827                                                                                                                                    
[249]	validation-rmse:6.6781

[293]	validation-rmse:6.67648                                                                                                                                    
[294]	validation-rmse:6.67642                                                                                                                                    
[295]	validation-rmse:6.67634                                                                                                                                    
[296]	validation-rmse:6.67633                                                                                                                                    
[297]	validation-rmse:6.67628                                                                                                                                    
[298]	validation-rmse:6.67633                                                                                                                                    
[299]	validation-rmse:6.6763

[343]	validation-rmse:6.67510                                                                                                                                    
[344]	validation-rmse:6.67514                                                                                                                                    
[345]	validation-rmse:6.67507                                                                                                                                    
[346]	validation-rmse:6.67512                                                                                                                                    
[347]	validation-rmse:6.67502                                                                                                                                    
[348]	validation-rmse:6.67499                                                                                                                                    
[349]	validation-rmse:6.6748

In [None]:
mlflow.xgboost.autolog()

In [None]:
with mlflow.start_run():
    
    train = xgb.DMatrix(X_train, label=y_train)
    valid = xgb.DMatrix(X_val, label=y_val)

    best_params = {
        'learning_rate': 0.09585355369315604,
        'max_depth': 30,
        'min_child_weight': 1.060597050922164,
        'objective': 'reg:linear',
        'reg_alpha': 0.018060244040060163,
        'reg_lambda': 0.011658731377413597,
        'seed': 42
    }

    booster = xgb.train(
        params=best_params,
        dtrain=train,
        num_boost_round=1000,
        evals=[(valid, 'validation')],
        early_stopping_rounds=50
    )

    y_pred = booster.predict(valid)
    rmse = mean_squared_error(y_val, y_pred, squared=False)
    mlflow.log_metric("rmse", rmse)