In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Types of models that will be used to test the methods
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import AdaBoostRegressor
# Scoring metrics
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV

In [2]:
training_data = pd.read_csv('./datasets/Train_Data.csv', index_col=0)
testing_data = pd.read_csv('./datasets/Test_Data.csv', index_col=0)
training_data.head()

Unnamed: 0,avg_trip_distance_hour,avg_fare_amount_hour,avg_tolls_amount_hour,avg_tip_amount_hour,avg_Brooklyn_PU_hour,avg_Manhattan_PU_hour,avg_Queens_PU_hour,avg_Staten_Island_PU_hour,avg_Bronx_DO_hour,avg_Brooklyn_DO_hour,...,avg_Queens_DO_hour,avg_Staten_Island_DO_hour,avg_Mins_In_Ride_hour,Year,Month,Day,Hour,Minute,Second,avg_total_amount_hour
2580,2.236724,9.381992,0.0,1.243851,0.028736,0.93295,0.022989,0.0,0.038314,0.024904,...,0.028736,0.0,8.66228,2020,4,17,13,0,0,13.582953
2534,2.240061,9.39793,0.0,1.381967,0.014344,0.94877,0.02459,0.0,0.043033,0.026639,...,0.02459,0.0,8.70209,2020,4,15,15,0,0,13.496905
1653,2.469164,10.333324,0.000353,2.06343,0.010463,0.963948,0.024581,0.0,0.005546,0.059876,...,0.035926,0.0,10.659279,2020,3,9,22,0,0,16.207518
4281,2.099069,9.251151,0.0,1.604006,0.022082,0.955836,0.020505,0.0,0.012618,0.031546,...,0.0347,0.0,8.363486,2020,6,27,10,0,0,13.726811
3057,2.511803,10.05296,0.004345,1.380512,0.037951,0.922201,0.017078,0.0,0.036053,0.039848,...,0.02277,0.0,9.328482,2020,5,7,10,0,0,13.604888


In [3]:
training_data = training_data.fillna(0)
testing_data = testing_data.fillna(0)

In [4]:
testing_data.isnull().sum()

avg_trip_distance_hour       0
avg_fare_amount_hour         0
avg_tolls_amount_hour        0
avg_tip_amount_hour          0
avg_Brooklyn_PU_hour         0
avg_Manhattan_PU_hour        0
avg_Queens_PU_hour           0
avg_Staten_Island_PU_hour    0
avg_Bronx_DO_hour            0
avg_Brooklyn_DO_hour         0
avg_Manhattan_DO_hour        0
avg_Queens_DO_hour           0
avg_Staten_Island_DO_hour    0
avg_Mins_In_Ride_hour        0
Year                         0
Month                        0
Day                          0
Hour                         0
Minute                       0
Second                       0
avg_total_amount_hour        0
dtype: int64

In [5]:
print(training_data.shape)
testing_data.shape

(3056, 21)


(1310, 21)

In [6]:
# splitting the data into training and testing variables
X_train = training_data.drop(['avg_total_amount_hour'], axis=1)
X_test = testing_data.drop(['avg_total_amount_hour'], axis=1)
y_train = training_data[['avg_total_amount_hour']]
y_test = testing_data[['avg_total_amount_hour']]

In [7]:
# This first model will test on the raw data using Linear Regression
linreg = LinearRegression()
linreg.fit(X_train, y_train)
linreg_y_pred = linreg.predict(X_test)

In [8]:
# Returns RMSE
train_linreg_RMSE = mean_squared_error(y_train, linreg.predict(X_train))
r2_linreg_train = r2_score(y_train, linreg.predict(X_train))
linreg_RMSE = mean_squared_error(y_test, linreg_y_pred, squared=False)
linreg_r2 = r2_score(y_test, linreg_y_pred)
linreg_r2

0.6764275253566046

In [9]:
linreg_parameters = {'RMSE_train': train_linreg_RMSE, 'RMSE_test': linreg_RMSE, 
                    'r2_train': r2_linreg_train, 'r2_test': linreg_r2}

In [63]:
linreg_coef = {}
for i in range(len(X_train.columns)):
    key = '{}_coef'.format(X_train.columns[i])
    linreg_coef[key] = linreg.coef_[0][i]
linreg_coef['Intercept'] = linreg.intercept_[0]
linreg_coef

{'avg_trip_distance_hour_coef': -0.32481568172044156,
 'avg_fare_amount_hour_coef': 0.5734315374844084,
 'avg_tolls_amount_hour_coef': -16.99126915779739,
 'avg_tip_amount_hour_coef': 0.8409761325502569,
 'avg_Brooklyn_PU_hour_coef': 5.983377870521164,
 'avg_Manhattan_PU_hour_coef': 8.247510944910449,
 'avg_Queens_PU_hour_coef': -2.0511599637526627,
 'avg_Staten_Island_PU_hour_coef': 4.428118228019648,
 'avg_Bronx_DO_hour_coef': -11.973762000817052,
 'avg_Brooklyn_DO_hour_coef': -7.849479795235226,
 'avg_Manhattan_DO_hour_coef': -18.524112006808252,
 'avg_Queens_DO_hour_coef': -8.11550181309112,
 'avg_Staten_Island_DO_hour_coef': 23.076246943651057,
 'avg_Mins_In_Ride_hour_coef': 0.16311470860086394,
 'Year_coef': -1.1934897514720433e-15,
 'Month_coef': 0.026108646950705996,
 'Day_coef': 0.00332080498531079,
 'Hour_coef': 0.031767581221394114,
 'Minute_coef': 0.0,
 'Second_coef': 0.0,
 'Intercept': 16.21422232637647}

# Ridge Regression

In [10]:
ridge = Ridge()
params = {'alpha': [1, 0.1, 0.01, 0.001, 0.0001, 0.00001]}
ridge_cv = GridSearchCV(ridge, param_grid=params, cv=5)
ridge_cv.fit(X_train, y_train)
ridge_cv.best_params_

{'alpha': 0.1}

In [11]:
ridgereg = Ridge(0.1)
ridgereg.fit(X_train, y_train)
ridgereg_y_pred = ridgereg.predict(X_test)

In [12]:
train_ridgereg_RMSE = mean_squared_error(y_train, ridgereg.predict(X_train))
r2_ridgereg_train = r2_score(y_train, ridgereg.predict(X_train))
ridgereg_RMSE = mean_squared_error(y_test, ridgereg_y_pred, squared=False)
ridgereg_r2 = r2_score(y_test, ridgereg_y_pred)
ridgereg_r2

0.6775786470340428

In [13]:
ridge_parameters = {'RMSE_train': train_ridgereg_RMSE, 'RMSE_test': ridgereg_RMSE, 
                    'r2_train': r2_ridgereg_train, 'r2_test': ridgereg_r2, 'alpha': 0.1}

In [55]:
ridgereg.coef_

array([[-3.91571519e-01,  5.61920907e-01, -9.42379108e+00,
         8.63518772e-01,  1.22301137e+00,  4.22058186e+00,
        -6.20625045e+00,  5.89528768e-01, -6.62885717e+00,
        -1.31695728e+00, -1.22728881e+01, -9.25779458e-01,
         5.88346551e-01,  1.64535390e-01,  0.00000000e+00,
         2.68638617e-02,  3.70647576e-03,  3.12439747e-02,
         0.00000000e+00,  0.00000000e+00]])

In [64]:
ridgereg_coef = {}
for i in range(len(X_train.columns)):
    key = '{}_coef'.format(X_train.columns[i])
    ridgereg_coef[key] = ridgereg.coef_[0][i]
ridgereg_coef['Intercept'] = ridgereg.intercept_[0]
ridgereg_coef

{'avg_trip_distance_hour_coef': -0.3915715186256619,
 'avg_fare_amount_hour_coef': 0.5619209071064462,
 'avg_tolls_amount_hour_coef': -9.423791084454756,
 'avg_tip_amount_hour_coef': 0.8635187721269665,
 'avg_Brooklyn_PU_hour_coef': 1.2230113662638304,
 'avg_Manhattan_PU_hour_coef': 4.220581858640246,
 'avg_Queens_PU_hour_coef': -6.206250447927718,
 'avg_Staten_Island_PU_hour_coef': 0.5895287682132268,
 'avg_Bronx_DO_hour_coef': -6.6288571704157295,
 'avg_Brooklyn_DO_hour_coef': -1.3169572813088062,
 'avg_Manhattan_DO_hour_coef': -12.272888086966498,
 'avg_Queens_DO_hour_coef': -0.9257794582000122,
 'avg_Staten_Island_DO_hour_coef': 0.5883465509992413,
 'avg_Mins_In_Ride_hour_coef': 0.1645353897990602,
 'Year_coef': 0.0,
 'Month_coef': 0.026863861746796405,
 'Day_coef': 0.0037064757641077457,
 'Hour_coef': 0.031243974743425277,
 'Minute_coef': 0.0,
 'Second_coef': 0.0,
 'Intercept': 14.153386640402397}

# Lasso Regression

In [14]:
lasso = Lasso()
params = {'alpha': [1, 0.1, 0.01, 0.001, 0.0001, 0.00001]}
lasso_cv = GridSearchCV(lasso, param_grid=params, cv=5)
lasso_cv.fit(X_train, y_train)
lasso_cv.best_params_

  positive)


{'alpha': 0.001}

In [15]:
lassoreg = Lasso(alpha=0.001)
lassoreg.fit(X_train, y_train)
lassoreg_y_pred = lassoreg.predict(X_test)

In [16]:
train_lassoreg_RMSE = mean_squared_error(y_train, lassoreg.predict(X_train))
r2_lassoreg_train = mean_squared_error(y_train, lassoreg.predict(X_train))
lassoreg_RMSE = mean_squared_error(y_test, lassoreg_y_pred, squared=False)
lassoreg_r2 = r2_score(y_test, lassoreg_y_pred)
lassoreg_r2

0.6805918317425883

In [17]:
lasso_parameters = {'RMSE_train': train_lassoreg_RMSE, 'RMSE_test': lassoreg_RMSE, 
                    'r2_train': r2_lassoreg_train, 'r2_test': lassoreg_r2, 'alpha': 0.001}

In [65]:
lassoreg_coef = {}
for i in range(len(X_train.columns)):
    key = '{}_coef'.format(X_train.columns[i])
    lassoreg_coef[key] = lassoreg.coef_[i]
lassoreg_coef['Intercept'] = lassoreg.intercept_[0]
lassoreg_coef

{'avg_trip_distance_hour_coef': -0.36846443289411296,
 'avg_fare_amount_hour_coef': 0.5264839419073939,
 'avg_tolls_amount_hour_coef': -0.0,
 'avg_tip_amount_hour_coef': 0.9890920456698394,
 'avg_Brooklyn_PU_hour_coef': 0.0,
 'avg_Manhattan_PU_hour_coef': 3.2189051850543917,
 'avg_Queens_PU_hour_coef': -3.8830536275128185,
 'avg_Staten_Island_PU_hour_coef': 0.0,
 'avg_Bronx_DO_hour_coef': -2.605974914423464,
 'avg_Brooklyn_DO_hour_coef': 0.0,
 'avg_Manhattan_DO_hour_coef': -9.886781767105843,
 'avg_Queens_DO_hour_coef': 0.0,
 'avg_Staten_Island_DO_hour_coef': 0.0,
 'avg_Mins_In_Ride_hour_coef': 0.1705450530554092,
 'Year_coef': 0.0,
 'Month_coef': 0.022676981940053294,
 'Day_coef': 0.003831181781465323,
 'Hour_coef': 0.028368544245179372,
 'Minute_coef': 0.0,
 'Second_coef': 0.0,
 'Intercept': 12.813904156563158}

# KNearest Neighbors

In [18]:
knn = KNeighborsRegressor()
params = {'n_neighbors': np.arange(1, 10)}
knn_cv = GridSearchCV(knn, param_grid=params, cv=5)
knn_cv.fit(X_train, y_train)
knn_cv.best_params_

{'n_neighbors': 8}

In [19]:
k_nn = KNeighborsRegressor(n_neighbors=8)
k_nn.fit(X_train, y_train)
k_nn_y_pred = k_nn.predict(X_test)

In [20]:
train_k_nn_RMSE = mean_squared_error(y_train, k_nn.predict(X_train))
r2_k_nn_train = r2_score(y_train, k_nn.predict(X_train))
k_nn_RMSE = mean_squared_error(y_test, k_nn_y_pred, squared=False)
k_nn_r2 = r2_score(y_test, k_nn_y_pred)
k_nn_r2

0.7531360334651717

In [21]:
k_nn_parameters = {'RMSE_train': train_k_nn_RMSE, 'RMSE_test': k_nn_RMSE, 
                    'r2_train': r2_k_nn_train, 'r2_test': k_nn_r2, 'n_neighbors': 8}

# Decision Tree

In [22]:
decisiontree = DecisionTreeRegressor()
params = {'max_depth': np.arange(1, 10)}
decisiontree = GridSearchCV(decisiontree, param_grid=params, cv=5)
decisiontree.fit(X_train, y_train)
decisiontree.best_params_

{'max_depth': 7}

In [23]:
tree = DecisionTreeRegressor(max_depth=7)
tree.fit(X_train, y_train)
tree_y_pred = tree.predict(X_test)

In [24]:
train_tree_RMSE = mean_squared_error(y_train, tree.predict(X_train))
r2_tree_train = r2_score(y_train, tree.predict(X_train))
tree_RMSE = mean_squared_error(y_test, tree_y_pred, squared=False)
tree_r2 = r2_score(y_test, tree_y_pred)
tree_RMSE

0.8242080764401857

In [25]:
tree_parameters = {'RMSE_train': train_tree_RMSE, 'RMSE_test': tree_RMSE, 
                    'r2_train': r2_tree_train, 'r2_test': tree_r2, 'max_depth':7}

# Random Forest Regressor

In [26]:
rfr = RandomForestRegressor()
params = {'n_estimators': np.arange(1, 10),
         'max_depth': np.arange(1, 10)}
rfr_cv = GridSearchCV(rfr, param_grid=params, cv=5)
rfr_cv.fit(X_train, y_train)
rfr_cv.best_params_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  self.best_estimator_.fit(X, y, **fit_params)


{'max_depth': 9, 'n_estimators': 9}

In [27]:
rfrtrees = RandomForestRegressor(n_estimators=5, max_depth=9)
rfrtrees.fit(X_train, y_train)
rfrtrees_y_pred = rfrtrees.predict(X_test)

  


In [28]:
train_rfrtrees_RMSE = mean_squared_error(y_train, rfrtrees.predict(X_train))
r2_rfrtrees_train = r2_score(y_train, rfrtrees.predict(X_train))
rfrtrees_RMSE = mean_squared_error(y_test, rfrtrees_y_pred, squared=False)
rfrtrees_r2 = r2_score(y_test, rfrtrees_y_pred)
rfrtrees_r2

0.8403740243991956

In [29]:
rfrtrees_parameters = {'RMSE_train': train_rfrtrees_RMSE, 'RMSE_test': rfrtrees_RMSE, 
                    'r2_train': r2_rfrtrees_train, 'r2_test': rfrtrees_r2, 'n_estimators': 5, 'max_depth': 9}

# Gradient Boosting

In [30]:
gradboost = GradientBoostingRegressor()
params = {'n_estimators': np.arange(1, 10),
         'max_depth': np.arange(1, 10)}
gradboost_cv = GridSearchCV(gradboost, param_grid=params, cv=5)
gradboost_cv.fit(X_train, y_train)
gradboost_cv.best_params_

  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **

  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **

  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


{'max_depth': 9, 'n_estimators': 9}

In [31]:
gradientboosting = GradientBoostingRegressor(n_estimators=9, max_depth=9)
gradientboosting.fit(X_train, y_train)
gradientboosting_y_predict = gradientboosting.predict(X_test)

  return f(*args, **kwargs)


In [32]:
train_gradientboosting_RMSE = mean_squared_error(y_train, gradientboosting.predict(X_train))
r2_gradientboosting_train = r2_score(y_train, gradientboosting.predict(X_train))
gradientboosting_RMSE = mean_squared_error(y_test, gradientboosting_y_predict, squared=False)
gradientboosting_r2 = r2_score(y_test, gradientboosting_y_predict)
gradientboosting_r2

0.7337951948963597

In [33]:
gradientboosting_parameters = {'RMSE_train': train_gradientboosting_RMSE, 'RMSE_test': gradientboosting_RMSE, 
                               'r2_train': r2_gradientboosting_train, 'r2_test': gradientboosting_r2,
                              'n_estimators': 9, 'max_depth': 9}

# Ada Boost

In [34]:
adaBoost = AdaBoostRegressor()
params = {'n_estimators': np.arange(1, 10)}
adaBoost_cv = GridSearchCV(adaBoost, param_grid=params, cv=5)
adaBoost_cv.fit(X_train, y_train)
adaBoost_cv.best_params_

  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


{'n_estimators': 9}

In [35]:
adaBoosting = AdaBoostRegressor(n_estimators=6)
adaBoosting.fit(X_train, y_train)
adaBoosting_y_pred = adaBoosting.predict(X_test)

  return f(*args, **kwargs)


In [36]:
train_adaBoosting_RMSE = mean_squared_error(y_train, adaBoosting.predict(X_train))
r2_adaBoosting_train = r2_score(y_train, adaBoosting.predict(X_train))
adaBoosting_RMSE = mean_squared_error(y_test, adaBoosting_y_pred)
adaBoosting_r2 = r2_score(y_test, adaBoosting_y_pred)
adaBoosting_r2

0.7376273590730902

In [37]:
adaboosting_parameters = {'RMSE_train': train_adaBoosting_RMSE, 'RMSE_test': adaBoosting_RMSE, 
                          'r2_train': r2_adaBoosting_train, 'r2_test': adaBoosting_r2, 'n_estimators': 6}

In [39]:
df = pd.DataFrame([linreg_parameters, ridge_parameters, lasso_parameters, k_nn_parameters, tree_parameters,
                  rfrtrees_parameters, gradientboosting_parameters, adaboosting_parameters],
                 index=['Linear Regression', 'Ridge Regression', 'Lasso Regression', 'KNearestNeighbors',
                       'Single Decison Tree', 'Random Forest', 'Gradient Boosting', 'AdaBoosting'])
df

Unnamed: 0,RMSE_train,RMSE_test,r2_train,r2_test,alpha,n_neighbors,max_depth,n_estimators
Linear Regression,1.268635,1.040149,0.604423,0.676428,,,,
Ridge Regression,1.275546,1.038297,0.602268,0.677579,0.1,,,
Lasso Regression,1.296831,1.033434,1.296831,0.680592,0.001,,,
KNearestNeighbors,0.749192,0.908528,0.766392,0.753136,,8.0,,
Single Decison Tree,0.392672,0.832889,0.87756,0.79253,,,7.0,
Random Forest,0.246644,0.704695,0.923093,0.85148,,,9.0,5.0
Gradient Boosting,0.616607,0.942479,0.807734,0.734341,,,9.0,9.0
AdaBoosting,0.785707,0.829604,0.755006,0.751886,,,,6.0


The best performing model is the RandomForest Regressor with max_depth=9 and n_estimators=5.

In [66]:
df_coef = pd.DataFrame([linreg_coef, ridgereg_coef, lassoreg_coef], index=['Linear Regression Coefficients',
                                                                          'Ridge Regression Coefficients',
                                                                          'Lasso Regression Coefficients'])
df_coef

Unnamed: 0,avg_trip_distance_hour_coef,avg_fare_amount_hour_coef,avg_tolls_amount_hour_coef,avg_tip_amount_hour_coef,avg_Brooklyn_PU_hour_coef,avg_Manhattan_PU_hour_coef,avg_Queens_PU_hour_coef,avg_Staten_Island_PU_hour_coef,avg_Bronx_DO_hour_coef,avg_Brooklyn_DO_hour_coef,...,avg_Queens_DO_hour_coef,avg_Staten_Island_DO_hour_coef,avg_Mins_In_Ride_hour_coef,Year_coef,Month_coef,Day_coef,Hour_coef,Minute_coef,Second_coef,Intercept
Linear Regression Coefficients,-0.324816,0.573432,-16.991269,0.840976,5.983378,8.247511,-2.05116,4.428118,-11.973762,-7.84948,...,-8.115502,23.076247,0.163115,-1.19349e-15,0.026109,0.003321,0.031768,0.0,0.0,16.214222
Ridge Regression Coefficients,-0.391572,0.561921,-9.423791,0.863519,1.223011,4.220582,-6.20625,0.589529,-6.628857,-1.316957,...,-0.925779,0.588347,0.164535,0.0,0.026864,0.003706,0.031244,0.0,0.0,14.153387
Lasso Regression Coefficients,-0.368464,0.526484,-0.0,0.989092,0.0,3.218905,-3.883054,0.0,-2.605975,0.0,...,0.0,0.0,0.170545,0.0,0.022677,0.003831,0.028369,0.0,0.0,12.813904


In [67]:
df_coef.to_csv('Coef_Model')

In [40]:
df.to_csv('Model_Metrics.csv')