In [15]:
#Import Relevant Libraries
from sklearn import set_config; 
set_config(display='diagram')

#Pipeline
from sklearn.pipeline import Pipeline

#Pre-processing
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from TaxiFareModel.encoders import TimeFeaturesEncoder, DistanceTransformer

#Estimators
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.ensemble import AdaBoostRegressor

#Grid Search
from sklearn.model_selection import GridSearchCV

In [16]:
estimators = {
        "Linear Regression" : LinearRegression(),
        "KNN" : KNeighborsRegressor(),
        "SVR" : SVR(),
        "Adaboost" : AdaBoostRegressor()
    }

In [None]:
 hyperparameters = {

        "KNN" : {"estimator__n_neighbors" : [2, 5, 10],
                     "estimator__weights" : ["uniform", "distance"],
                   "estimator__leaf_size" : [15, 30, 45]
                   },

        "SVR" : {"estimator__kernel" : ["linear", "poly", "rbf"],
                      "estimator__C" : [0.01, 0.1, 0.5, 1]
                 },
        "Adaboost" : {"estimator__learning_rate" : [1, 5, 10],
                      "estimator__loss" : ["linear", "square", "exponential"]}
    }

In [20]:
#defines the pipeline as a class attribute
dist_pipe = Pipeline([
            ('dist_trans', DistanceTransformer()),
            ('stdscaler', StandardScaler())
        ])

time_pipe = Pipeline([
            ('time_enc', TimeFeaturesEncoder('pickup_datetime')),
            ('ohe', OneHotEncoder(handle_unknown='ignore'))
        ])
        
preproc_pipe = ColumnTransformer([
            ('distance', dist_pipe, [
                "pickup_latitude",
                "pickup_longitude",
                'dropoff_latitude',
                'dropoff_longitude'
            ]),
            ('time', time_pipe, ['pickup_datetime'])
        ], remainder="drop")

pipeline = Pipeline([
            ('preproc', preproc_pipe),
            ('estimator', estimators["KNN"])
        ])

# pipeline.get_params().keys()

grid_search = GridSearchCV(
                    pipeline,
                    param_grid = hyperparameters["KNN"],
                    cv = 5,
                    scoring = "neg_mean_squared_error")

grid_search = best_estimator_

dict_keys(['memory', 'steps', 'verbose', 'preproc', 'estimator', 'preproc__n_jobs', 'preproc__remainder', 'preproc__sparse_threshold', 'preproc__transformer_weights', 'preproc__transformers', 'preproc__verbose', 'preproc__distance', 'preproc__time', 'preproc__distance__memory', 'preproc__distance__steps', 'preproc__distance__verbose', 'preproc__distance__dist_trans', 'preproc__distance__stdscaler', 'preproc__distance__dist_trans__end_lat', 'preproc__distance__dist_trans__end_lon', 'preproc__distance__dist_trans__start_lat', 'preproc__distance__dist_trans__start_lon', 'preproc__distance__stdscaler__copy', 'preproc__distance__stdscaler__with_mean', 'preproc__distance__stdscaler__with_std', 'preproc__time__memory', 'preproc__time__steps', 'preproc__time__verbose', 'preproc__time__time_enc', 'preproc__time__ohe', 'preproc__time__time_enc__time_column', 'preproc__time__time_enc__time_zone_name', 'preproc__time__ohe__categories', 'preproc__time__ohe__drop', 'preproc__time__ohe__dtype', 'prep