In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Types of models that will be used to test the methods
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import AdaBoostRegressor
# Scoring metrics
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV

In [2]:
training_data = pd.read_csv('./datasets/Train_Data.csv', index_col=0)
testing_data = pd.read_csv('./datasets/Test_Data.csv', index_col=0)
training_data.head()

Unnamed: 0,avg_VendorID_hour,avg_passenger_count_hour,avg_trip_distance_hour,avg_RatecodeID_hour,avg_fare_amount_hour,avg_extra_hour,avg_mta_tax_hour,avg_tip_amount_hour,avg_tolls_amount_hour,avg_improvement_surcharge_hour,...,avg_Brooklyn_DO_isna_hour,avg_Staten_Island_DO_isna_hour,avg_Mins_In_Ride_hour,Year,Month,Day,Hour,Minute,Second,avg_total_amount_hour
3098,1.239437,0.971831,4.546901,0.746479,15.863662,0.788732,0.5,0.536479,0.0,0.3,...,0.0,0.0,13.298592,2020.0,5.0,9.0,3.0,0.0,0.0,18.904366
699,1.6658,1.488947,2.949376,0.990897,11.14459,1.227243,0.5,1.546125,0.0,0.3,...,0.0,0.0,9.815397,2020.0,1.0,30.0,4.0,0.0,0.0,16.287464
1415,1.690328,1.588607,2.415337,0.998443,10.763609,1.228893,0.5,2.022124,0.001377,0.3,...,0.0,0.0,11.948955,2020.0,2.0,29.0,0.0,0.0,0.0,16.463901
3413,1.066667,0.890909,3.840424,0.666667,13.259091,0.924242,0.5,0.813152,0.0,0.3,...,0.0,0.0,10.926788,2020.0,5.0,22.0,6.0,0.0,0.0,17.020424
1754,1.695288,1.598855,2.505821,0.998679,10.019269,1.199031,0.5,1.62277,0.0,0.3,...,0.0,0.0,9.339326,2020.0,3.0,14.0,3.0,0.0,0.0,15.223831


In [3]:
scaled_training_data = pd.read_csv('./datasets/Scaled_Train_Data.csv', index_col=0)
scaled_testing_data = pd.read_csv('./datasets/Scaled_Test_Data.csv', index_col=0)
scaled_training_data.head()

Unnamed: 0,avg_VendorID_hour,avg_passenger_count_hour,avg_trip_distance_hour,avg_RatecodeID_hour,avg_fare_amount_hour,avg_extra_hour,avg_mta_tax_hour,avg_tip_amount_hour,avg_tolls_amount_hour,avg_improvement_surcharge_hour,...,avg_Bronx_DO_isna_hour,avg_Brooklyn_DO_isna_hour,avg_Staten_Island_DO_isna_hour,avg_Mins_In_Ride_hour,Year,Month,Day,Hour,Minute,Second
3098,-1.359815,-1.457616,2.434144,-1.632501,2.503856,-0.838095,0.0,-2.435027,-0.274567,-0.37439,...,0.0,0.0,1.645624,0.0,0.871232,-0.757613,-1.242337,0.0,0.0,1.995106
699,0.691624,0.777944,0.455723,0.487821,0.234047,0.457915,0.0,0.220628,-0.274567,-0.226047,...,0.0,0.0,-0.279444,0.0,-1.466594,1.640723,-1.096978,0.0,0.0,0.527513
1415,0.80964,1.208787,-0.205646,0.553277,0.0508,0.462792,0.0,1.472641,-0.069526,2.243984,...,0.0,0.0,0.899716,0.0,-0.882137,1.526516,-1.678416,0.0,0.0,0.626461
3413,-2.191094,-1.807453,1.559223,-2.32487,1.251093,-0.437598,0.0,-1.7073,-0.274567,-0.421483,...,0.0,0.0,0.334792,0.0,0.871232,0.727071,-0.806258,0.0,0.0,0.938566
1754,0.833508,1.253093,-0.093587,0.555327,-0.307218,0.374535,0.0,0.422226,-0.274567,-0.616919,...,0.0,0.0,-0.542555,0.0,-0.297681,-0.186581,-1.242337,0.0,0.0,-0.068987


In [4]:
training_data = training_data.fillna(0)
testing_data = testing_data.fillna(0)
scaled_training_data = scaled_training_data.fillna(0)
scaled_testing_data = scaled_testing_data.fillna(0)

In [5]:
print(training_data.shape)
testing_data.shape

(3056, 43)


(1311, 43)

In [6]:
print(scaled_training_data.shape)
scaled_testing_data.shape

(3056, 43)


(1311, 43)

In [7]:
# splitting the data into training and testing variables
X_train = training_data.drop(['avg_total_amount_hour'], axis=1)
X_test = testing_data.drop(['avg_total_amount_hour'], axis=1)
y_train = training_data[['avg_total_amount_hour']]
y_test = testing_data[['avg_total_amount_hour']]

In [8]:
# splitting the scaled data into training and testing variables
scaled_X_train = scaled_training_data.drop(['avg_total_amount_hour'], axis=1)
scaled_X_test = scaled_testing_data.drop(['avg_total_amount_hour'], axis=1)
scaled_y_train = scaled_training_data['avg_total_amount_hour']
scaled_y_test = scaled_testing_data['avg_total_amount_hour']

In [9]:
def get_metrics(model, X_train_param, X_test_param, y_train_param, y_test_param):
    model.fit(X_train_param, y_train_param)
    model_train_pred = model.predict(X_train_param)
    model_test_pred = model.predict(X_test_param)
    
    model_train_RMSE = mean_squared_error(y_train_param, model_train_pred)
    model_train_R2 = r2_score(y_train_param, model_train_pred)
    model_test_RMSE = mean_squared_error(y_test_param, model_test_pred)
    model_test_R2 = r2_score(y_test_param, model_test_pred)
    return model, model_train_RMSE, model_test_RMSE, model_train_R2, model_test_R2


In [10]:
linreg = LinearRegression()
lin_metrics = get_metrics(linreg, X_train, X_test, y_train, y_test)
lin_metrics[4]

0.9993729631960804

In [11]:
linreg_parameters = {'RMSE_train': lin_metrics[1], 'RMSE_test': lin_metrics[2], 
                    'r2_train': lin_metrics[3], 'r2_test': lin_metrics[4]}

In [12]:
linreg_coef = {}
for i in range(len(X_train.columns)):
    key = '{}_coef'.format(X_train.columns[i])
    linreg_coef[key] = lin_metrics[0].coef_[0][i]
linreg_coef['Intercept'] = lin_metrics[0].intercept_[0]
linreg_coef

{'avg_VendorID_hour_coef': 2.08203270451262,
 'avg_passenger_count_hour_coef': -0.019953783919202883,
 'avg_trip_distance_hour_coef': 0.016940204061752537,
 'avg_RatecodeID_hour_coef': -0.7839224947271504,
 'avg_fare_amount_hour_coef': 0.9815060039918709,
 'avg_extra_hour_coef': 0.992009532923819,
 'avg_mta_tax_hour_coef': -54215516345.94801,
 'avg_tip_amount_hour_coef': 1.0221030330552017,
 'avg_tolls_amount_hour_coef': 0.859500265946085,
 'avg_improvement_surcharge_hour_coef': 90359194139.0434,
 'avg_congestion_surcharge_hour_coef': 0.6934931563511377,
 'avg_Bronx_PU_hour_coef': -81.88278046859064,
 'avg_Brooklyn_PU_hour_coef': -81.94487817670563,
 'avg_Manhattan_PU_hour_coef': -82.00551266222112,
 'avg_Queens_PU_hour_coef': -81.75373967815037,
 'avg_Staten_Island_PU_hour_coef': -73.36872023074822,
 'avg_Bronx_DO_hour_coef': 12.232494782659625,
 'avg_Brooklyn_DO_hour_coef': 11.581875448201671,
 'avg_Manhattan_DO_hour_coef': 11.411909576174647,
 'avg_Queens_DO_hour_coef': 11.377741484

In [13]:
linreg_scaled = LinearRegression()
scaled_lin_metrics = get_metrics(linreg_scaled, scaled_X_train, scaled_X_test, scaled_y_train, scaled_y_test)
scaled_lin_metrics[4]

0.9702495248213329

In [14]:
scaled_linreg_parameters = {'RMSE_train': scaled_lin_metrics[1], 'RMSE_test': scaled_lin_metrics[2], 
                    'r2_train': scaled_lin_metrics[3], 'r2_test': scaled_lin_metrics[4]}

In [15]:
scaled_linreg_coef = {}
for i in range(len(scaled_X_train.columns)):
    key = '{}_coef'.format(scaled_X_train.columns[i])
    scaled_linreg_coef[key] = scaled_lin_metrics[0].coef_[i]
scaled_linreg_coef['Intercept'] = scaled_lin_metrics[0].intercept_
scaled_linreg_coef

{'avg_VendorID_hour_coef': -1.1817621128793427,
 'avg_passenger_count_hour_coef': 0.02730243772595442,
 'avg_trip_distance_hour_coef': -0.022607892354132506,
 'avg_RatecodeID_hour_coef': -1.6901672418479503,
 'avg_fare_amount_hour_coef': -6.16262685851149,
 'avg_extra_hour_coef': -0.9855467706171989,
 'avg_mta_tax_hour_coef': -3.723688024592775e-13,
 'avg_tip_amount_hour_coef': -1.092668348019771,
 'avg_tolls_amount_hour_coef': -0.01522685312337028,
 'avg_improvement_surcharge_hour_coef': -0.010378251288141453,
 'avg_congestion_surcharge_hour_coef': 8.664365206521135,
 'avg_Bronx_PU_hour_coef': 14.079480254749098,
 'avg_Brooklyn_PU_hour_coef': 27.773314593850163,
 'avg_Manhattan_PU_hour_coef': 9.918902837979857,
 'avg_Queens_PU_hour_coef': 0.3856657351425195,
 'avg_Staten_Island_PU_hour_coef': -3.038095883911806,
 'avg_Bronx_DO_hour_coef': -3.003306545163294,
 'avg_Brooklyn_DO_hour_coef': -7.47112032514357,
 'avg_Manhattan_DO_hour_coef': -2.8999895185007287,
 'avg_Queens_DO_hour_coef':

# Ridge Regression

In [16]:
ridge = Ridge()
params = {'alpha': [1, 0.1, 0.01, 0.001, 0.0001, 0.00001]}
ridge_cv = GridSearchCV(ridge, param_grid=params, cv=5)
ridge_cv.fit(X_train, y_train)
ridge_cv.best_params_

{'alpha': 0.001}

In [17]:
ridgereg = Ridge(0.0001)
ridge_metrics = get_metrics(ridgereg, X_train, X_test, y_train, y_test)
ridge_metrics[4]

0.9993707198916155

In [18]:
ridge_parameters = {'RMSE_train': ridge_metrics[1], 'RMSE_test': ridge_metrics[2], 
                    'r2_train': ridge_metrics[3], 'r2_test': ridge_metrics[4], 'alpha': 0.1}

In [19]:
ridgereg_coef = {}
for i in range(len(X_train.columns)):
    key = '{}_coef'.format(X_train.columns[i])
    ridgereg_coef[key] = ridge_metrics[0].coef_[0][i]
ridgereg_coef['Intercept'] = ridge_metrics[0].intercept_[0]
ridgereg_coef

{'avg_VendorID_hour_coef': 2.083571334497991,
 'avg_passenger_count_hour_coef': -0.02212278388600009,
 'avg_trip_distance_hour_coef': 0.015256490068861903,
 'avg_RatecodeID_hour_coef': 0.7279384434293956,
 'avg_fare_amount_hour_coef': 0.9815177207845863,
 'avg_extra_hour_coef': 0.9925996954123646,
 'avg_mta_tax_hour_coef': 1.6491170936705435,
 'avg_tip_amount_hour_coef': 1.0232867879259984,
 'avg_tolls_amount_hour_coef': 0.9034263721440626,
 'avg_improvement_surcharge_hour_coef': 0.989470256887346,
 'avg_congestion_surcharge_hour_coef': 0.6895625738946127,
 'avg_Bronx_PU_hour_coef': -2.8240881630924,
 'avg_Brooklyn_PU_hour_coef': -2.933959616938312,
 'avg_Manhattan_PU_hour_coef': -3.0051162050611526,
 'avg_Queens_PU_hour_coef': -2.7241940591155216,
 'avg_Staten_Island_PU_hour_coef': 1.3118190274987929,
 'avg_Bronx_DO_hour_coef': -0.6715047566453838,
 'avg_Brooklyn_DO_hour_coef': -1.3107200042966527,
 'avg_Manhattan_DO_hour_coef': -1.4764676694042034,
 'avg_Queens_DO_hour_coef': -1.5194

In [20]:
scaled_ridgereg = Ridge(0.0001)
scaled_ridge_metrics = get_metrics(scaled_ridgereg, scaled_X_train, scaled_X_test, scaled_y_train, scaled_y_test)
scaled_ridge_metrics[4]

0.9702499451702038

In [21]:
scaled_ridge_parameters = {'RMSE_train': scaled_ridge_metrics[1], 'RMSE_test': scaled_ridge_metrics[2], 
                    'r2_train': scaled_ridge_metrics[3], 'r2_test': scaled_ridge_metrics[4], 'alpha': 0.1}

In [22]:
scaled_ridgereg_coef = {}
for i in range(len(scaled_X_train.columns)):
    key = '{}_coef'.format(scaled_X_train.columns[i])
    scaled_ridgereg_coef[key] = scaled_ridge_metrics[0].coef_[i]
scaled_ridgereg_coef['Intercept'] = scaled_ridge_metrics[0].intercept_
scaled_ridgereg_coef

{'avg_VendorID_hour_coef': -1.181691444967512,
 'avg_passenger_count_hour_coef': 0.027419786474891454,
 'avg_trip_distance_hour_coef': -0.02259830588792112,
 'avg_RatecodeID_hour_coef': -1.6436734783486415,
 'avg_fare_amount_hour_coef': -6.161826629863194,
 'avg_extra_hour_coef': -0.9854339238721391,
 'avg_mta_tax_hour_coef': 0.0,
 'avg_tip_amount_hour_coef': -1.092631620657205,
 'avg_tolls_amount_hour_coef': -0.015274470697062425,
 'avg_improvement_surcharge_hour_coef': -0.010382339811052833,
 'avg_congestion_surcharge_hour_coef': 6.248762491561888,
 'avg_Bronx_PU_hour_coef': 10.140054928771523,
 'avg_Brooklyn_PU_hour_coef': 20.06514744234096,
 'avg_Manhattan_PU_hour_coef': 7.1474940663628646,
 'avg_Queens_PU_hour_coef': 0.26983612048127636,
 'avg_Staten_Island_PU_hour_coef': 0.38209002204518444,
 'avg_Bronx_DO_hour_coef': 0.5644068788922819,
 'avg_Brooklyn_DO_hour_coef': 1.4736461971607921,
 'avg_Manhattan_DO_hour_coef': 0.5632508149695921,
 'avg_Queens_DO_hour_coef': 0.0433159210986

# Lasso Regression

In [23]:
LassoReg = Lasso(alpha=0.001)
LassoReg.fit(X_train, y_train)
ls_y_pred = LassoReg.predict(X_test)
print(mean_squared_error(y_test, ls_y_pred))
r2_score(y_test, ls_y_pred)

0.007118179059164154


0.9977981959493795

In [24]:
len(X_train.columns)

42

In [25]:
LassoReg.coef_

array([ 5.15966517e-01,  0.00000000e+00,  3.80607742e-02, -1.95219300e-01,
        1.02317015e+00,  9.01762043e-01, -0.00000000e+00,  1.06635784e+00,
        0.00000000e+00, -0.00000000e+00,  5.78319632e-01,  0.00000000e+00,
        0.00000000e+00, -0.00000000e+00, -0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00, -0.00000000e+00, -0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  5.83194207e-03,
        3.66522225e-05, -9.49719087e-03, -5.44820475e-04, -1.49296726e-03,
        0.00000000e+00,  0.00000000e+00])

In [26]:
lasso = Lasso()
params = {'alpha': [1, 0.1, 0.01, 0.001, 0.0001, 0.00001]}
lasso_cv = GridSearchCV(lasso, param_grid=params, cv=5)
lasso_cv.fit(X_train, y_train)
lasso_cv.best_params_

  positive)
  positive)
  positive)
  positive)
  positive)


{'alpha': 0.0001}

In [27]:
lassoreg = Lasso(alpha=0.001)
lassoreg_metrics = get_metrics(lassoreg, X_train, X_test, y_train, y_test)
lassoreg_metrics

(Lasso(alpha=0.001),
 0.007303280338898098,
 0.007118179059164154,
 0.9977566606636259,
 0.9977981959493795)

In [28]:
lasso_parameters = {'RMSE_train': lassoreg_metrics[1], 'RMSE_test': lassoreg_metrics[2], 
                    'r2_train': lassoreg_metrics[3], 'r2_test': lassoreg_metrics[4], 'alpha': 0.001}

In [29]:
lassoreg_coef = {}
for i in range(len(X_train.columns)):
    key = '{}_coef'.format(X_train.columns[i])
    lassoreg_coef[key] = lassoreg_metrics[0].coef_[i]
lassoreg_coef['Intercept'] = lassoreg_metrics[0].intercept_[0]
lassoreg_coef

{'avg_VendorID_hour_coef': 0.5159665167192508,
 'avg_passenger_count_hour_coef': 0.0,
 'avg_trip_distance_hour_coef': 0.038060774159138715,
 'avg_RatecodeID_hour_coef': -0.19521930031699486,
 'avg_fare_amount_hour_coef': 1.023170154011633,
 'avg_extra_hour_coef': 0.9017620434110457,
 'avg_mta_tax_hour_coef': -0.0,
 'avg_tip_amount_hour_coef': 1.0663578435430408,
 'avg_tolls_amount_hour_coef': 0.0,
 'avg_improvement_surcharge_hour_coef': -0.0,
 'avg_congestion_surcharge_hour_coef': 0.5783196322343793,
 'avg_Bronx_PU_hour_coef': 0.0,
 'avg_Brooklyn_PU_hour_coef': 0.0,
 'avg_Manhattan_PU_hour_coef': -0.0,
 'avg_Queens_PU_hour_coef': -0.0,
 'avg_Staten_Island_PU_hour_coef': 0.0,
 'avg_Bronx_DO_hour_coef': 0.0,
 'avg_Brooklyn_DO_hour_coef': 0.0,
 'avg_Manhattan_DO_hour_coef': -0.0,
 'avg_Queens_DO_hour_coef': -0.0,
 'avg_Staten_Island_DO_hour_coef': 0.0,
 'avg_VendorID_isna_hour_coef': 0.0,
 'avg_passenger_count_isna_hour_coef': 0.0,
 'avg_RatecodeID_isna_hour_coef': 0.0,
 'avg_payment_type

In [30]:
scaled_lassoreg = Lasso(0.001)
scaled_lassoreg_metrics = get_metrics(scaled_lassoreg, scaled_X_train, scaled_X_test, scaled_y_train, scaled_y_test)
scaled_lassoreg_metrics[4]

  positive)


0.9569582732490871

In [31]:
scaled_lasso_parameters = {'RMSE_train': scaled_lassoreg_metrics[1], 'RMSE_test': scaled_lassoreg_metrics[2], 
                    'r2_train': scaled_lassoreg_metrics[3], 'r2_test': scaled_lassoreg_metrics[4], 'alpha': 0.001}

In [32]:
scaled_lassoreg_coef = {}
for i in range(len(scaled_X_train.columns)):
    key = '{}_coef'.format(scaled_X_train.columns[i])
    scaled_lassoreg_coef[key] = scaled_lassoreg_metrics[0].coef_[i]
scaled_lassoreg_coef['Intercept'] = scaled_lassoreg_metrics[0].intercept_
scaled_lassoreg_coef

{'avg_VendorID_hour_coef': -0.44972622859213546,
 'avg_passenger_count_hour_coef': 0.01999255550319721,
 'avg_trip_distance_hour_coef': -0.04362735570495609,
 'avg_RatecodeID_hour_coef': 1.2369653784540295,
 'avg_fare_amount_hour_coef': -3.3307980417308176,
 'avg_extra_hour_coef': -0.502834237094887,
 'avg_mta_tax_hour_coef': 0.0,
 'avg_tip_amount_hour_coef': -0.4935212557139565,
 'avg_tolls_amount_hour_coef': -0.0043499012136795434,
 'avg_improvement_surcharge_hour_coef': -0.009961028919797573,
 'avg_congestion_surcharge_hour_coef': -0.0,
 'avg_Bronx_PU_hour_coef': -0.09410046871986133,
 'avg_Brooklyn_PU_hour_coef': 0.2041737540701721,
 'avg_Manhattan_PU_hour_coef': -0.02119285836937732,
 'avg_Queens_PU_hour_coef': -0.0009509230764459515,
 'avg_Staten_Island_PU_hour_coef': -0.1816696001150897,
 'avg_Bronx_DO_hour_coef': -0.0,
 'avg_Brooklyn_DO_hour_coef': 0.03223041325578727,
 'avg_Manhattan_DO_hour_coef': -0.0055971092471884,
 'avg_Queens_DO_hour_coef': -0.0,
 'avg_Staten_Island_DO_h

# KNearest Neighbors

In [33]:
knn = KNeighborsRegressor()
params = {'n_neighbors': np.arange(1, 10)}
knn_cv = GridSearchCV(knn, param_grid=params, cv=5)
knn_cv.fit(X_train, y_train)
knn_cv.best_params_

{'n_neighbors': 2}

In [34]:
k_nn = KNeighborsRegressor(n_neighbors=6)
k_nn_metrics = get_metrics(k_nn, X_train, X_test, y_train, y_test)
k_nn_metrics[4]

0.9560954569266636

In [35]:
k_nn_parameters = {'RMSE_train': k_nn_metrics[1], 'RMSE_test': k_nn_metrics[2], 
                    'r2_train': k_nn_metrics[3], 'r2_test': k_nn_metrics[4], 'n_neighbors': 8}

In [36]:
scaled_knn_metrics = get_metrics(k_nn, scaled_X_train, scaled_X_test, scaled_y_train, scaled_y_test)
scaled_knn_metrics[4]

0.932411758312841

In [37]:
scaled_k_nn_parameters = {'RMSE_train': scaled_knn_metrics[1], 'RMSE_test': scaled_knn_metrics[2], 
                    'r2_train': scaled_knn_metrics[3], 'r2_test': scaled_knn_metrics[4], 'n_neighbors': 8}

# Decision Tree

In [38]:
decisiontree = DecisionTreeRegressor()
params = {'max_depth': np.arange(1, 10)}
decisiontree = GridSearchCV(decisiontree, param_grid=params, cv=5)
decisiontree.fit(X_train, y_train)
decisiontree.best_params_

{'max_depth': 9}

In [39]:
tree = DecisionTreeRegressor(max_depth=8)
tree_metrics = get_metrics(tree, X_train, X_test, y_train, y_test)
tree_metrics[4]

0.9764747797370066

In [40]:
tree_parameters = {'RMSE_train': tree_metrics[1], 'RMSE_test': tree_metrics[2], 
                    'r2_train': tree_metrics[3], 'r2_test': tree_metrics[4], 'max_depth':8}

In [41]:
scaled_tree_metrics = get_metrics(tree, scaled_X_train, scaled_X_test, scaled_y_train, scaled_y_test)
scaled_tree_metrics[4]

0.9065568540223155

In [42]:
scaled_tree_parameters = {'RMSE_train': scaled_tree_metrics[1], 'RMSE_test': scaled_tree_metrics[2], 
                    'r2_train': scaled_tree_metrics[3], 'r2_test': scaled_tree_metrics[4], 'max_depth':8}

# Random Forest Regressor

In [43]:
rfr = RandomForestRegressor()
params = {'n_estimators': np.arange(1, 10),
         'max_depth': np.arange(1, 10)}
rfr_cv = GridSearchCV(rfr, param_grid=params, cv=5)
rfr_cv.fit(X_train, y_train)
rfr_cv.best_params_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  self.best_estimator_.fit(X, y, **fit_params)


{'max_depth': 9, 'n_estimators': 8}

In [44]:
rfrreg = RandomForestRegressor(max_depth=9, n_estimators=8)
rfrreg_metrics = get_metrics(rfrreg, X_train, X_test, y_train, y_test)
rfrreg_metrics[4]

  


0.9886187822782851

In [45]:
rfrtrees_parameters = {'RMSE_train': rfrreg_metrics[1], 'RMSE_test': rfrreg_metrics[2], 
                    'r2_train': rfrreg_metrics[3], 'r2_test': rfrreg_metrics[4], 'n_estimators': 5, 'max_depth': 9}

In [46]:
scaled_rfrreg_metrics = get_metrics(rfrreg, scaled_X_train, scaled_X_test, scaled_y_train, scaled_y_test)
scaled_rfrreg_metrics[4]

0.935463591297065

In [47]:
scaled_rfrtrees_parameters = {'RMSE_train': scaled_rfrreg_metrics[1], 'RMSE_test': scaled_rfrreg_metrics[2], 
                              'r2_train': scaled_rfrreg_metrics[3], 'r2_test': scaled_rfrreg_metrics[4],
                              'n_estimators': 5, 'max_depth': 9}

# Gradient Boosting

In [48]:
gradboost = GradientBoostingRegressor()
params = {'n_estimators': np.arange(1, 10),
         'max_depth': np.arange(1, 10)}
gradboost_cv = GridSearchCV(gradboost, param_grid=params, cv=5)
gradboost_cv.fit(X_train, y_train)
gradboost_cv.best_params_

  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **

  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


{'max_depth': 9, 'n_estimators': 9}

In [49]:
gradientboost = GradientBoostingRegressor(max_depth=9, n_estimators=9)
gradient_metrics = get_metrics(gradientboost, X_train, X_test, y_train, y_test)
gradient_metrics[4]

  return f(*args, **kwargs)


0.8396452069036671

In [50]:
gradientboosting_parameters = {'RMSE_train': gradient_metrics[1], 'RMSE_test': gradient_metrics[2], 
                               'r2_train': gradient_metrics[3], 'r2_test': gradient_metrics[4],
                              'n_estimators': 9, 'max_depth': 9}

In [51]:
scaled_gradient_metrics = get_metrics(gradientboost, scaled_X_train, scaled_X_test, scaled_y_train, scaled_y_test)
scaled_gradient_metrics[4]

0.7947290091264229

In [52]:
scaled_gradientboosting_parameters = {'RMSE_train': scaled_gradient_metrics[1], 'RMSE_test': scaled_gradient_metrics[2], 
                               'r2_train': scaled_gradient_metrics[3], 'r2_test': scaled_gradient_metrics[4],
                              'n_estimators': 9, 'max_depth': 9}

# Ada Boost

In [53]:
adaBoost = AdaBoostRegressor()
params = {'n_estimators': np.arange(1, 10)}
adaBoost_cv = GridSearchCV(adaBoost, param_grid=params, cv=5)
adaBoost_cv.fit(X_train, y_train)
adaBoost_cv.best_params_

  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


{'n_estimators': 8}

In [54]:
adaboosting = AdaBoostRegressor(n_estimators=8)
adaboosting_metrics = get_metrics(adaboosting, X_train, X_test, y_train, y_test)
adaboosting_metrics[4]

  return f(*args, **kwargs)


0.9055643639304206

In [55]:
adaboosting_parameters = {'RMSE_train': adaboosting_metrics[1], 'RMSE_test': adaboosting_metrics[2], 
                          'r2_train': adaboosting_metrics[3], 'r2_test': adaboosting_metrics[4], 'n_estimators': 6}

In [56]:
scaled_adaboosting_metrics = get_metrics(adaboosting, scaled_X_train, scaled_X_test, scaled_y_train, scaled_y_test)
scaled_adaboosting_metrics[4]

0.896291579102381

In [57]:
scaled_adaboosting_parameters = {'RMSE_train': scaled_adaboosting_metrics[1], 'RMSE_test': scaled_adaboosting_metrics[2], 
                          'r2_train': scaled_adaboosting_metrics[3], 'r2_test': scaled_adaboosting_metrics[4],
                          'n_estimators': 6}

In [58]:
df = pd.DataFrame([linreg_parameters, ridge_parameters, lasso_parameters, k_nn_parameters, tree_parameters,
                  rfrtrees_parameters, gradientboosting_parameters, adaboosting_parameters],
                 index=['Linear Regression', 'Ridge Regression', 'Lasso Regression', 'KNearestNeighbors',
                       'Single Decison Tree', 'Random Forest', 'Gradient Boosting', 'AdaBoosting'])

df_scaled = pd.DataFrame([scaled_linreg_parameters, scaled_ridge_parameters, scaled_lasso_parameters,
                          scaled_k_nn_parameters, scaled_tree_parameters, scaled_rfrtrees_parameters,
                          scaled_gradientboosting_parameters, scaled_adaboosting_parameters],
                 index=['Scaled Linear Regression', 'Scaled Ridge Regression', 'Scaled Lasso Regression',
                        'Scaled KNearestNeighbors', 'Scaled Single Decison Tree', 'Scaled Random Forest',
                        'Scaled Gradient Boosting', 'Scaled AdaBoosting'])

The best performing model is the RandomForest Regressor with max_depth=9 and n_estimators=5.

In [59]:
df_coef = pd.DataFrame([linreg_coef, ridgereg_coef, lassoreg_coef], index=['Linear Regression Coefficients',
                                                                          'Ridge Regression Coefficients',
                                                                          'Lasso Regression Coefficients'])

df_coef_scaled = pd.DataFrame([scaled_linreg_coef, scaled_ridgereg_coef, scaled_lassoreg_coef],
                              index=['Scaled Linear Regression Coefficients', 'Scaled Ridge Regression Coefficients',
                                    'Scaled Lasso Regression Coefficients'])

In [60]:
df

Unnamed: 0,RMSE_train,RMSE_test,r2_train,r2_test,alpha,n_neighbors,max_depth,n_estimators
Linear Regression,0.002018,0.002027,0.99938,0.999373,,,,
Ridge Regression,0.00205,0.002034,0.99937,0.999371,0.1,,,
Lasso Regression,0.007303,0.007118,0.997757,0.997798,0.001,,,
KNearestNeighbors,0.132697,0.141938,0.95924,0.956095,,8.0,,
Single Decison Tree,0.039336,0.076054,0.987917,0.976475,,,8.0,
Random Forest,0.020177,0.036794,0.993802,0.988619,,,9.0,5.0
Gradient Boosting,0.497446,0.518409,0.8472,0.839645,,,9.0,9.0
AdaBoosting,0.282912,0.3053,0.913098,0.905564,,,,6.0


In [61]:
df_scaled

Unnamed: 0,RMSE_train,RMSE_test,r2_train,r2_test,alpha,n_neighbors,max_depth,n_estimators
Scaled Linear Regression,0.02795,0.030253,0.972041,0.97025,,,,
Scaled Ridge Regression,0.027957,0.030253,0.972034,0.97025,0.1,,,
Scaled Lasso Regression,0.039477,0.043769,0.96051,0.956958,0.001,,,
Scaled KNearestNeighbors,0.042982,0.068731,0.957004,0.932412,,8.0,,
Scaled Single Decison Tree,0.027441,0.095023,0.97255,0.906557,,,8.0,
Scaled Random Forest,0.019864,0.065627,0.98013,0.935464,,,9.0,5.0
Scaled Gradient Boosting,0.159822,0.208741,0.840125,0.794729,,,9.0,9.0
Scaled AdaBoosting,0.071983,0.105461,0.927993,0.896292,,,,6.0


In [62]:
df_coef

Unnamed: 0,avg_VendorID_hour_coef,avg_passenger_count_hour_coef,avg_trip_distance_hour_coef,avg_RatecodeID_hour_coef,avg_fare_amount_hour_coef,avg_extra_hour_coef,avg_mta_tax_hour_coef,avg_tip_amount_hour_coef,avg_tolls_amount_hour_coef,avg_improvement_surcharge_hour_coef,...,avg_Brooklyn_DO_isna_hour_coef,avg_Staten_Island_DO_isna_hour_coef,avg_Mins_In_Ride_hour_coef,Year_coef,Month_coef,Day_coef,Hour_coef,Minute_coef,Second_coef,Intercept
Linear Regression Coefficients,2.082033,-0.019954,0.01694,-0.783922,0.981506,0.99201,-54215520000.0,1.022103,0.8595,90359190000.0,...,0.0,0.0,0.003655,-1.3e-05,0.001179,-0.000281,-0.000492,0.0,0.0,0.032297
Ridge Regression Coefficients,2.083571,-0.022123,0.015256,0.727938,0.981518,0.9926,1.649117,1.023287,0.903426,0.9894703,...,0.0,0.0,0.00457,-1.3e-05,0.001045,-0.000279,-0.000434,0.0,0.0,0.02692
Lasso Regression Coefficients,0.515967,0.0,0.038061,-0.195219,1.02317,0.901762,-0.0,1.066358,0.0,-0.0,...,0.0,0.0,0.005832,3.7e-05,-0.009497,-0.000545,-0.001493,0.0,0.0,-0.078297


In [63]:
df_coef_scaled

Unnamed: 0,avg_VendorID_hour_coef,avg_passenger_count_hour_coef,avg_trip_distance_hour_coef,avg_RatecodeID_hour_coef,avg_fare_amount_hour_coef,avg_extra_hour_coef,avg_mta_tax_hour_coef,avg_tip_amount_hour_coef,avg_tolls_amount_hour_coef,avg_improvement_surcharge_hour_coef,...,avg_Brooklyn_DO_isna_hour_coef,avg_Staten_Island_DO_isna_hour_coef,avg_Mins_In_Ride_hour_coef,Year_coef,Month_coef,Day_coef,Hour_coef,Minute_coef,Second_coef,Intercept
Scaled Linear Regression Coefficients,-1.181762,0.027302,-0.022608,-1.690167,-6.162627,-0.985547,-3.723688e-13,-1.092668,-0.015227,-0.010378,...,0.0,0.019074,0.0,-0.041388,0.006276,-0.015624,0.0,0.0,5.294336,4e-06
Scaled Ridge Regression Coefficients,-1.181691,0.02742,-0.022598,-1.643673,-6.161827,-0.985434,0.0,-1.092632,-0.015274,-0.010382,...,0.0,0.019039,0.0,-0.041391,0.006283,-0.015665,0.0,0.0,5.293651,4e-06
Scaled Lasso Regression Coefficients,-0.449726,0.019993,-0.043627,1.236965,-3.330798,-0.502834,0.0,-0.493521,-0.00435,-0.009961,...,0.0,0.037252,0.0,-0.066406,0.0,-0.040194,0.0,0.0,2.79736,4e-06


In [64]:
df.to_csv('./datasets/Model_Metrics.csv')
df_scaled.to_csv('./datasets/Scaled_Model_Metrics')
df_coef.to_csv('./datasets/Coef_Metrics')
df_coef_scaled.to_csv('./datasets/Scaled_Coef_Metrics')