In [2]:
# data manipulation and storage
import pandas as pd
import numpy as np

# plotting and graphs
import seaborn as sns
import matplotlib.pyplot as plt

# data preprocessing
# from sklearn.feature_extraction import DictVectorizer
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder

# models
# from sklearn.linear_model import LinearRegression
# from sklearn.linear_model import Lasso
# from sklearn.linear_model import Ridge
# import xgboost as xgb
from sklearn.ensemble import GradientBoostingRegressor

# model performance metrics
from sklearn.metrics import mean_squared_error

# saving model to file
import pickle

# mlflow for experiment tracking
import mlflow

# hyper-parameter optimization
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

# sklearn pipeline creation
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

# misc utilities
import copy

# intel sklearn optimization library
from sklearnex import patch_sklearn
patch_sklearn()



Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [None]:
# train_path = 'data/green_tripdata_2021-01.parquet'
# val_path = 'data/green_tripdata_2021-01.parquet'
# df_train = pd.read_parquet(train_path)
# df_train

In [3]:
def cleaned_train_and_target(df,clean=True):
    
    # create concatenated categorical feature
    df['PU_DO_pair'] = df['PULocationID'].astype(str) + '_' + df['DOLocationID'].astype(str)                
    # create target feature
    df['duration'] = df['lpep_dropoff_datetime'] - df['lpep_pickup_datetime']
    df['duration'] = df['duration'].apply(lambda td: td.total_seconds()/60)

    if clean == True:
        # filter out rows based on various conditions
        df = df[(df['duration'] >= 1) & (df['duration'] <= 60)]
        df = df[(df['trip_distance'] > 1)&(df['trip_distance'] < 25)]
        df = df[(df['total_amount'] > 1)&(df['total_amount'] < 150)]
        df = df[df['passenger_count'] > 0]  
 
    y = df['duration']
    X = df[['PU_DO_pair','trip_distance','total_amount']]
    return X,y


In [4]:
def initialize_regression_model(params,type='gradientbooster'):

    categorical = ['PU_DO_pair']
    numerical = ['trip_distance','fare_amount']

    numeric_pipeline = Pipeline(steps=[
        ('impute', SimpleImputer(strategy='mean')),
        ('scale', StandardScaler())
    ])
    categorical_pipeline = Pipeline(steps=[
        ('impute', SimpleImputer(strategy='most_frequent')),
        ('one-hot', OneHotEncoder(handle_unknown='ignore', sparse=False))
    ])


    preprocessor_pipeline = ColumnTransformer(transformers=[
        ('numeric', numeric_pipeline, numerical),
        ('categoric', categorical_pipeline, categorical)
    ])

    if type == 'gradientbooster':
        regressor = GradientBoostingRegressor(**params)
    else:
        regressor = None

    regression_model = Pipeline(steps=[
        ('preprocess', preprocessor_pipeline),
        ('model', regressor)
    ])

    return regression_model


In [5]:
def hyperparameter_optimizer(
    X_train,
    y_train,
    X_val,
    y_val
):

    # define hyper-parameter search space
    search_space = {
        # 'n_estimators':hp.choice('n_estimators',np.arange(10,101,1)),
        'learning_rate':hp.loguniform('learning_rate',-3,0),
        # 'min_samples_split':hp.loguniform('min_child_weight',-4,0),
        # 'max_depth':scope.int(hp.quniform('max_depth',5,100,5)),        
        'random_state':42
    }

    # define objective function
    def objective(params):
        
        with mlflow.start_run():
            mlflow.set_tag('model','gradientboostingregressor')
            # mlflow.log_params(params)
            mlflow.sklearn.autolog()
            pipe = initialize_regression_model(params=params,type='gradientbooster')
            pipe.fit(X_train,y_train)
            y_pred = pipe.predict(X_val)
            rmse = mean_squared_error(y_val,y_pred,squared=False)
            mlflow.log_metric('validation_rmse',rmse)
            # mlflow.log_artifact(scaler_path,artifact_path="preprocessor")
            # mlflow.log_artifact(vectorizer_path,artifact_path="preprocessor")
            # mlflow.xgboost.log_model(xgb_model,artifact_path="models_mlflow")
        

        return {'loss':rmse,'status':STATUS_OK}


    # Perform hyper-parameter optimization
    best_result = fmin(
        fn = objective,
        space = search_space,
        algo = tpe.suggest,
        max_evals=50,
        trials = Trials()
    )   

    return 

In [6]:
def main(
    train_path = 'data/green_tripdata_2021-01.parquet',
    val_path = 'data/green_tripdata_2021-01.parquet',
    tracking_uri = 'sqlite:///mlflow.db',
    experiment = 'gradient-booster-experiment'
):
    mlflow.set_tracking_uri(tracking_uri)
    mlflow.set_experiment(experiment)
    train_path = 'data/green_tripdata_2021-01.parquet'
    val_path = 'data/green_tripdata_2021-01.parquet'
    df_train = pd.read_parquet(train_path)
    df_val = pd.read_parquet(val_path)
    X_train,y_train = cleaned_train_and_target(df_train,clean=True)
    X_val, y_val = cleaned_train_and_target(df_val,clean=True)
    hyperparameter_optimizer(X_train,y_train,X_val,y_val)
    # train_best_model(train,valid,y_val,dv,scaler)

main()

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`




  2%|▏         | 1/50 [04:54<4:00:34, 294.58s/trial, best loss: 2.6771038832759655]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



  4%|▍         | 2/50 [09:56<3:59:10, 298.97s/trial, best loss: 2.6771038832759655]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



  6%|▌         | 3/50 [15:29<4:06:18, 314.44s/trial, best loss: 2.543704328834773] 

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



  8%|▊         | 4/50 [21:09<4:08:54, 324.67s/trial, best loss: 2.247812054505768]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 10%|█         | 5/50 [26:48<4:07:15, 329.68s/trial, best loss: 2.247812054505768]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 12%|█▏        | 6/50 [32:40<4:07:15, 337.18s/trial, best loss: 2.247812054505768]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 14%|█▍        | 7/50 [38:39<4:06:55, 344.54s/trial, best loss: 2.0408301567742004]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 16%|█▌        | 8/50 [44:32<4:03:01, 347.18s/trial, best loss: 2.0408301567742004]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 18%|█▊        | 9/50 [50:34<4:00:21, 351.75s/trial, best loss: 2.0408301567742004]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 20%|██        | 10/50 [56:05<3:50:19, 345.50s/trial, best loss: 2.0408301567742004]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 22%|██▏       | 11/50 [1:01:53<3:45:02, 346.22s/trial, best loss: 2.0408301567742004]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 24%|██▍       | 12/50 [1:07:43<3:39:57, 347.30s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 26%|██▌       | 13/50 [1:13:27<3:33:27, 346.16s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 28%|██▊       | 14/50 [1:19:19<3:28:52, 348.14s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 30%|███       | 15/50 [1:25:01<3:21:59, 346.28s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 32%|███▏      | 16/50 [1:30:39<3:14:40, 343.56s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 34%|███▍      | 17/50 [1:36:31<3:10:23, 346.16s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 36%|███▌      | 18/50 [1:42:17<3:04:36, 346.13s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 38%|███▊      | 19/50 [1:47:50<2:56:47, 342.18s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 40%|████      | 20/50 [1:53:48<2:53:33, 347.10s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 42%|████▏     | 21/50 [1:59:43<2:48:50, 349.33s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 44%|████▍     | 22/50 [2:05:32<2:42:57, 349.18s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 46%|████▌     | 23/50 [2:11:40<2:39:39, 354.81s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 48%|████▊     | 24/50 [2:17:46<2:35:12, 358.16s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 50%|█████     | 25/50 [2:23:53<2:30:21, 360.85s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 52%|█████▏    | 26/50 [2:29:56<2:24:36, 361.52s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 54%|█████▍    | 27/50 [2:35:19<2:14:09, 349.97s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 56%|█████▌    | 28/50 [2:40:12<2:02:04, 332.92s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 58%|█████▊    | 29/50 [2:45:09<1:52:46, 322.22s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 60%|██████    | 30/50 [2:50:02<1:44:30, 313.51s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 62%|██████▏   | 31/50 [2:54:25<1:34:27, 298.27s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 64%|██████▍   | 32/50 [2:59:01<1:27:28, 291.60s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 66%|██████▌   | 33/50 [3:03:56<1:22:53, 292.56s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 68%|██████▊   | 34/50 [3:08:51<1:18:13, 293.35s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 70%|███████   | 35/50 [3:13:47<1:13:30, 294.06s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 72%|███████▏  | 36/50 [3:18:43<1:08:45, 294.70s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 74%|███████▍  | 37/50 [3:23:31<1:03:25, 292.75s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 76%|███████▌  | 38/50 [3:27:58<57:00, 285.02s/trial, best loss: 1.9062409545629415]  

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 78%|███████▊  | 39/50 [3:32:51<52:38, 287.18s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 80%|████████  | 40/50 [3:37:46<48:18, 289.80s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 82%|████████▏ | 41/50 [3:42:38<43:33, 290.34s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 84%|████████▍ | 42/50 [3:47:37<39:04, 293.05s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 86%|████████▌ | 43/50 [3:52:34<34:17, 293.98s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 88%|████████▊ | 44/50 [3:57:23<29:16, 292.69s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 90%|█████████ | 45/50 [4:02:02<24:02, 288.47s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 92%|█████████▏| 46/50 [4:07:03<19:29, 292.26s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 94%|█████████▍| 47/50 [4:11:30<14:14, 284.74s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 96%|█████████▌| 48/50 [4:16:25<09:35, 287.65s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



 98%|█████████▊| 49/50 [4:21:20<04:49, 289.86s/trial, best loss: 1.9062409545629415]

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
             ...`

                                 Pipeline(steps=[('impute', SimpleImputer()),
                                                 ('scale', StandardScaler())]),
                                 ['trip_distance', 'fare_amount']),
                                ('categoric',
                                 Pipeline(steps=[('impute',
                                                  SimpleImputer(strategy='most_frequent')),
                             ...`



100%|██████████| 50/50 [4:26:18<00:00, 319.57s/trial, best loss: 1.9062409545629415]
