In [44]:
import pandas as pd
import numpy as np
import sklearn

import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from scipy import stats

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
## Loading the training set

X = pd.read_csv("X.csv")
X.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,ocean_proximity
0,-121.46,38.52,29.0,3873.0,797.0,2237.0,706.0,2.1736,INLAND
1,-117.23,33.09,7.0,5320.0,855.0,2015.0,768.0,6.3373,NEAR OCEAN
2,-119.04,35.37,44.0,1618.0,310.0,667.0,300.0,2.875,INLAND
3,-117.13,32.75,24.0,1877.0,519.0,898.0,483.0,2.2264,NEAR OCEAN
4,-118.7,34.28,27.0,3536.0,646.0,1837.0,580.0,4.4964,<1H OCEAN


# # Grid Search:

Let's find the best hyperparams values for our models.

In [4]:
## Loading the score reports of the models

import json

report = json.load(open("scores_report.json", "r"))
report

{'mod1 (Linear Regression)': {'r2': 0.648079515380494,
  'rmse': 68635.27064635929},
 'mod2 (DescisionTreeRegressor)': {'r2': 1.0, 'rmse': 0.0},
 'mod2.1 (dt cv)': {'rmse': 69620.91130100285},
 'mod1.1 (lr cv)': {'rmse': 69027.45964364048},
 'mod3 (RandomForestRegressor)': {'rmse': 18490.47999527855},
 'mod3.1 (rf cv)': {'rmse': 49720.022020119024}}

In [5]:
## load the prepared features and label

X_prepared = np.loadtxt("X_prepared.txt")
Y = pd.read_csv("Y.csv")

In [6]:
## Let's go ahead w our random forest model

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

In [9]:
## Parameters to be hypertuned

param_grid = [
    { "n_estimators": [3, 10, 30], 'max_features': [2, 4, 6, 8], 
     "ccp_alpha": np.arange(0, 0.5, 0.02)},
#     { "n_estimators": [30, 60, 120], 'max_features': [8, 12, 16]},
    
    { "bootstrap": [False], 'n_estimators': [3, 10], 'max_features': [2, 3, 4], 
     "ccp_alpha": np.arange(0, 0.5, 0.02)},
#     { "bootstrap": [False], 'n_estimators': [30, 60, 120], 'max_features': [4, 8, 12, 16]},
    
]

#### Baseline model:

In [10]:
## baseline model

rf_mod = RandomForestRegressor()
rf_mod

In [11]:
## baseline model's mse

basemod_score = report["mod3.1 (rf cv)"]
basemod_score

{'rmse': 49720.022020119024}

In [12]:
## Executing the GridSearchCV

grid_search = GridSearchCV(rf_mod, param_grid, cv=5, scoring='neg_mean_squared_error', 
                       return_train_score=True, verbose=3)
grid_search.fit(X_prepared, Y)

Fitting 5 folds for each of 450 candidates, totalling 2250 fits
[CV 1/5] END ccp_alpha=0.0, max_features=2, n_estimators=3;, score=(train=-1154944402.550, test=-4501615749.707) total time=   0.0s
[CV 2/5] END ccp_alpha=0.0, max_features=2, n_estimators=3;, score=(train=-1193115762.909, test=-4459631939.507) total time=   0.0s
[CV 3/5] END ccp_alpha=0.0, max_features=2, n_estimators=3;, score=(train=-1127072881.416, test=-4299876564.332) total time=   0.0s
[CV 4/5] END ccp_alpha=0.0, max_features=2, n_estimators=3;, score=(train=-1265103358.753, test=-4797093215.048) total time=   0.0s
[CV 5/5] END ccp_alpha=0.0, max_features=2, n_estimators=3;, score=(train=-1223858216.108, test=-4437893600.740) total time=   0.0s
[CV 1/5] END ccp_alpha=0.0, max_features=2, n_estimators=10;, score=(train=-645495022.652, test=-3333520063.131) total time=   0.4s
[CV 2/5] END ccp_alpha=0.0, max_features=2, n_estimators=10;, score=(train=-600433261.746, test=-3280203430.099) total time=   0.3s
[CV 3/5] END

[CV 4/5] END ccp_alpha=0.02, max_features=2, n_estimators=3;, score=(train=-1183125415.364, test=-4575699922.756) total time=   0.0s
[CV 5/5] END ccp_alpha=0.02, max_features=2, n_estimators=3;, score=(train=-1150146694.694, test=-4392774733.688) total time=   0.0s
[CV 1/5] END ccp_alpha=0.02, max_features=2, n_estimators=10;, score=(train=-642753534.957, test=-3390088257.568) total time=   0.4s
[CV 2/5] END ccp_alpha=0.02, max_features=2, n_estimators=10;, score=(train=-645432478.053, test=-3208209227.899) total time=   0.4s
[CV 3/5] END ccp_alpha=0.02, max_features=2, n_estimators=10;, score=(train=-655482062.609, test=-3388998771.571) total time=   0.4s
[CV 4/5] END ccp_alpha=0.02, max_features=2, n_estimators=10;, score=(train=-620749419.775, test=-3342301903.534) total time=   0.4s
[CV 5/5] END ccp_alpha=0.02, max_features=2, n_estimators=10;, score=(train=-625826309.975, test=-3370190670.814) total time=   0.5s
[CV 1/5] END ccp_alpha=0.02, max_features=2, n_estimators=30;, score=

[CV 1/5] END ccp_alpha=0.04, max_features=2, n_estimators=10;, score=(train=-625724901.807, test=-3381453931.951) total time=   0.3s
[CV 2/5] END ccp_alpha=0.04, max_features=2, n_estimators=10;, score=(train=-645738459.152, test=-3241539897.922) total time=   0.3s
[CV 3/5] END ccp_alpha=0.04, max_features=2, n_estimators=10;, score=(train=-633264728.523, test=-3224044587.974) total time=   0.4s
[CV 4/5] END ccp_alpha=0.04, max_features=2, n_estimators=10;, score=(train=-636493249.112, test=-3522635081.910) total time=   0.3s
[CV 5/5] END ccp_alpha=0.04, max_features=2, n_estimators=10;, score=(train=-635311945.572, test=-3435900658.398) total time=   0.3s
[CV 1/5] END ccp_alpha=0.04, max_features=2, n_estimators=30;, score=(train=-477608129.595, test=-3069449341.474) total time=   1.2s
[CV 2/5] END ccp_alpha=0.04, max_features=2, n_estimators=30;, score=(train=-471342484.118, test=-2992896190.832) total time=   1.3s
[CV 3/5] END ccp_alpha=0.04, max_features=2, n_estimators=30;, score=

[CV 3/5] END ccp_alpha=0.06, max_features=2, n_estimators=10;, score=(train=-618442967.065, test=-3364531827.732) total time=   0.3s
[CV 4/5] END ccp_alpha=0.06, max_features=2, n_estimators=10;, score=(train=-630127846.297, test=-3467386409.577) total time=   0.4s
[CV 5/5] END ccp_alpha=0.06, max_features=2, n_estimators=10;, score=(train=-633051496.070, test=-3368449641.536) total time=   0.3s
[CV 1/5] END ccp_alpha=0.06, max_features=2, n_estimators=30;, score=(train=-467028924.220, test=-3065216317.497) total time=   1.2s
[CV 2/5] END ccp_alpha=0.06, max_features=2, n_estimators=30;, score=(train=-468389120.918, test=-2841509979.155) total time=   1.2s
[CV 3/5] END ccp_alpha=0.06, max_features=2, n_estimators=30;, score=(train=-476814003.659, test=-2999978803.402) total time=   1.3s
[CV 4/5] END ccp_alpha=0.06, max_features=2, n_estimators=30;, score=(train=-460972952.479, test=-3144325972.727) total time=   1.2s
[CV 5/5] END ccp_alpha=0.06, max_features=2, n_estimators=30;, score=

[CV 5/5] END ccp_alpha=0.08, max_features=2, n_estimators=10;, score=(train=-649861953.013, test=-3398950686.346) total time=   0.3s
[CV 1/5] END ccp_alpha=0.08, max_features=2, n_estimators=30;, score=(train=-471334419.188, test=-3046357553.825) total time=   1.2s
[CV 2/5] END ccp_alpha=0.08, max_features=2, n_estimators=30;, score=(train=-488433374.610, test=-2965346879.353) total time=   1.3s
[CV 3/5] END ccp_alpha=0.08, max_features=2, n_estimators=30;, score=(train=-491219943.322, test=-3036607888.779) total time=   1.1s
[CV 4/5] END ccp_alpha=0.08, max_features=2, n_estimators=30;, score=(train=-476747883.565, test=-3172999729.982) total time=   1.2s
[CV 5/5] END ccp_alpha=0.08, max_features=2, n_estimators=30;, score=(train=-471847813.830, test=-3084747358.164) total time=   1.2s
[CV 1/5] END ccp_alpha=0.08, max_features=4, n_estimators=3;, score=(train=-1107721080.791, test=-3970797165.651) total time=   0.1s
[CV 2/5] END ccp_alpha=0.08, max_features=4, n_estimators=3;, score=(

[CV 2/5] END ccp_alpha=0.1, max_features=2, n_estimators=30;, score=(train=-496519848.721, test=-2973332056.493) total time=   1.1s
[CV 3/5] END ccp_alpha=0.1, max_features=2, n_estimators=30;, score=(train=-491929227.927, test=-3041352036.715) total time=   1.2s
[CV 4/5] END ccp_alpha=0.1, max_features=2, n_estimators=30;, score=(train=-474630531.760, test=-3107533563.054) total time=   1.1s
[CV 5/5] END ccp_alpha=0.1, max_features=2, n_estimators=30;, score=(train=-464140615.593, test=-3055852653.006) total time=   1.2s
[CV 1/5] END ccp_alpha=0.1, max_features=4, n_estimators=3;, score=(train=-992374071.968, test=-3892044021.843) total time=   0.1s
[CV 2/5] END ccp_alpha=0.1, max_features=4, n_estimators=3;, score=(train=-1005292106.692, test=-3971355347.260) total time=   0.1s
[CV 3/5] END ccp_alpha=0.1, max_features=4, n_estimators=3;, score=(train=-1074818158.184, test=-4048716390.439) total time=   0.1s
[CV 4/5] END ccp_alpha=0.1, max_features=4, n_estimators=3;, score=(train=-10

[CV 5/5] END ccp_alpha=0.12, max_features=2, n_estimators=30;, score=(train=-462327867.525, test=-3046771760.994) total time=   1.1s
[CV 1/5] END ccp_alpha=0.12, max_features=4, n_estimators=3;, score=(train=-1077683427.789, test=-4015657175.656) total time=   0.1s
[CV 2/5] END ccp_alpha=0.12, max_features=4, n_estimators=3;, score=(train=-1068627356.964, test=-3952683981.557) total time=   0.1s
[CV 3/5] END ccp_alpha=0.12, max_features=4, n_estimators=3;, score=(train=-985020723.823, test=-3813733477.038) total time=   0.1s
[CV 4/5] END ccp_alpha=0.12, max_features=4, n_estimators=3;, score=(train=-1075115501.366, test=-3955724752.204) total time=   0.1s
[CV 5/5] END ccp_alpha=0.12, max_features=4, n_estimators=3;, score=(train=-1028781810.618, test=-4191311572.256) total time=   0.1s
[CV 1/5] END ccp_alpha=0.12, max_features=4, n_estimators=10;, score=(train=-570938456.349, test=-3077065500.533) total time=   0.5s
[CV 2/5] END ccp_alpha=0.12, max_features=4, n_estimators=10;, score=(

[CV 2/5] END ccp_alpha=0.14, max_features=4, n_estimators=3;, score=(train=-1012506526.804, test=-3751203510.562) total time=   0.2s
[CV 3/5] END ccp_alpha=0.14, max_features=4, n_estimators=3;, score=(train=-1086333856.095, test=-3881058565.878) total time=   0.1s
[CV 4/5] END ccp_alpha=0.14, max_features=4, n_estimators=3;, score=(train=-996208889.025, test=-3915323943.726) total time=   0.1s
[CV 5/5] END ccp_alpha=0.14, max_features=4, n_estimators=3;, score=(train=-1124503140.539, test=-4155503799.801) total time=   0.1s
[CV 1/5] END ccp_alpha=0.14, max_features=4, n_estimators=10;, score=(train=-555165693.991, test=-3123759662.529) total time=   0.7s
[CV 2/5] END ccp_alpha=0.14, max_features=4, n_estimators=10;, score=(train=-569435015.796, test=-2886443732.166) total time=   0.9s
[CV 3/5] END ccp_alpha=0.14, max_features=4, n_estimators=10;, score=(train=-561071959.278, test=-2925280347.604) total time=   0.9s
[CV 4/5] END ccp_alpha=0.14, max_features=4, n_estimators=10;, score=(

[CV 4/5] END ccp_alpha=0.16, max_features=4, n_estimators=3;, score=(train=-1070583064.866, test=-4220930899.493) total time=   0.1s
[CV 5/5] END ccp_alpha=0.16, max_features=4, n_estimators=3;, score=(train=-1027137574.649, test=-3979220494.757) total time=   0.1s
[CV 1/5] END ccp_alpha=0.16, max_features=4, n_estimators=10;, score=(train=-565847779.921, test=-3006964345.140) total time=   0.5s
[CV 2/5] END ccp_alpha=0.16, max_features=4, n_estimators=10;, score=(train=-603619022.959, test=-3057956839.900) total time=   0.6s
[CV 3/5] END ccp_alpha=0.16, max_features=4, n_estimators=10;, score=(train=-570075129.069, test=-2872531529.946) total time=   0.5s
[CV 4/5] END ccp_alpha=0.16, max_features=4, n_estimators=10;, score=(train=-558809211.185, test=-3020420598.441) total time=   0.5s
[CV 5/5] END ccp_alpha=0.16, max_features=4, n_estimators=10;, score=(train=-570998842.589, test=-3123798447.326) total time=   0.6s
[CV 1/5] END ccp_alpha=0.16, max_features=4, n_estimators=30;, score=

[CV 1/5] END ccp_alpha=0.18, max_features=4, n_estimators=10;, score=(train=-577783360.622, test=-3028602255.141) total time=   0.5s
[CV 2/5] END ccp_alpha=0.18, max_features=4, n_estimators=10;, score=(train=-556205775.651, test=-2868392215.829) total time=   0.6s
[CV 3/5] END ccp_alpha=0.18, max_features=4, n_estimators=10;, score=(train=-576104415.523, test=-3002121813.900) total time=   0.5s
[CV 4/5] END ccp_alpha=0.18, max_features=4, n_estimators=10;, score=(train=-561563119.324, test=-3044877892.033) total time=   0.5s
[CV 5/5] END ccp_alpha=0.18, max_features=4, n_estimators=10;, score=(train=-556583767.093, test=-3039169219.797) total time=   0.5s
[CV 1/5] END ccp_alpha=0.18, max_features=4, n_estimators=30;, score=(train=-433739000.226, test=-2771822439.632) total time=   1.8s
[CV 2/5] END ccp_alpha=0.18, max_features=4, n_estimators=30;, score=(train=-439207820.819, test=-2694002738.087) total time=   1.8s
[CV 3/5] END ccp_alpha=0.18, max_features=4, n_estimators=30;, score=

[CV 3/5] END ccp_alpha=0.2, max_features=4, n_estimators=10;, score=(train=-570685485.804, test=-2908319044.766) total time=   0.6s
[CV 4/5] END ccp_alpha=0.2, max_features=4, n_estimators=10;, score=(train=-561286607.485, test=-3172324700.282) total time=   0.5s
[CV 5/5] END ccp_alpha=0.2, max_features=4, n_estimators=10;, score=(train=-568486805.412, test=-3198240203.217) total time=   0.6s
[CV 1/5] END ccp_alpha=0.2, max_features=4, n_estimators=30;, score=(train=-432167465.866, test=-2767098952.579) total time=   1.8s
[CV 2/5] END ccp_alpha=0.2, max_features=4, n_estimators=30;, score=(train=-428428886.230, test=-2669457786.081) total time=   1.8s
[CV 3/5] END ccp_alpha=0.2, max_features=4, n_estimators=30;, score=(train=-444668407.127, test=-2689653671.923) total time=   1.9s
[CV 4/5] END ccp_alpha=0.2, max_features=4, n_estimators=30;, score=(train=-421093315.136, test=-2882101463.625) total time=   2.0s
[CV 5/5] END ccp_alpha=0.2, max_features=4, n_estimators=30;, score=(train=-

[CV 5/5] END ccp_alpha=0.22, max_features=4, n_estimators=10;, score=(train=-582935366.464, test=-3175631799.403) total time=   0.5s
[CV 1/5] END ccp_alpha=0.22, max_features=4, n_estimators=30;, score=(train=-420328478.521, test=-2711511430.796) total time=   1.8s
[CV 2/5] END ccp_alpha=0.22, max_features=4, n_estimators=30;, score=(train=-424494304.623, test=-2610013920.730) total time=   1.8s
[CV 3/5] END ccp_alpha=0.22, max_features=4, n_estimators=30;, score=(train=-433709713.703, test=-2706819000.802) total time=   1.9s
[CV 4/5] END ccp_alpha=0.22, max_features=4, n_estimators=30;, score=(train=-429794066.136, test=-2874494863.243) total time=   2.0s
[CV 5/5] END ccp_alpha=0.22, max_features=4, n_estimators=30;, score=(train=-420009201.761, test=-2798380013.033) total time=   1.8s
[CV 1/5] END ccp_alpha=0.22, max_features=6, n_estimators=3;, score=(train=-929710599.409, test=-3561779230.829) total time=   0.2s
[CV 2/5] END ccp_alpha=0.22, max_features=6, n_estimators=3;, score=(t

[CV 2/5] END ccp_alpha=0.24, max_features=4, n_estimators=30;, score=(train=-438498652.431, test=-2673350728.533) total time=   1.8s
[CV 3/5] END ccp_alpha=0.24, max_features=4, n_estimators=30;, score=(train=-429996879.656, test=-2684385671.730) total time=   1.9s
[CV 4/5] END ccp_alpha=0.24, max_features=4, n_estimators=30;, score=(train=-425886222.182, test=-2864364374.361) total time=   1.8s
[CV 5/5] END ccp_alpha=0.24, max_features=4, n_estimators=30;, score=(train=-420120068.384, test=-2792714688.128) total time=   1.7s
[CV 1/5] END ccp_alpha=0.24, max_features=6, n_estimators=3;, score=(train=-986004542.775, test=-3771106562.901) total time=   0.1s
[CV 2/5] END ccp_alpha=0.24, max_features=6, n_estimators=3;, score=(train=-1036883469.826, test=-3638211266.639) total time=   0.2s
[CV 3/5] END ccp_alpha=0.24, max_features=6, n_estimators=3;, score=(train=-1034800496.321, test=-3975249726.554) total time=   0.2s
[CV 4/5] END ccp_alpha=0.24, max_features=6, n_estimators=3;, score=(t

[CV 4/5] END ccp_alpha=0.26, max_features=4, n_estimators=30;, score=(train=-429402059.140, test=-2851572858.247) total time=   1.8s
[CV 5/5] END ccp_alpha=0.26, max_features=4, n_estimators=30;, score=(train=-424342576.515, test=-2806031116.394) total time=   1.8s
[CV 1/5] END ccp_alpha=0.26, max_features=6, n_estimators=3;, score=(train=-918850866.503, test=-3537954598.023) total time=   0.1s
[CV 2/5] END ccp_alpha=0.26, max_features=6, n_estimators=3;, score=(train=-983315868.233, test=-3744538408.679) total time=   0.2s
[CV 3/5] END ccp_alpha=0.26, max_features=6, n_estimators=3;, score=(train=-986140290.573, test=-3687441005.580) total time=   0.2s
[CV 4/5] END ccp_alpha=0.26, max_features=6, n_estimators=3;, score=(train=-1023699241.189, test=-4043539272.489) total time=   0.2s
[CV 5/5] END ccp_alpha=0.26, max_features=6, n_estimators=3;, score=(train=-968531201.172, test=-3676357512.647) total time=   0.2s
[CV 1/5] END ccp_alpha=0.26, max_features=6, n_estimators=10;, score=(tra

[CV 1/5] END ccp_alpha=0.28, max_features=6, n_estimators=3;, score=(train=-931237892.920, test=-3648113711.411) total time=   0.2s
[CV 2/5] END ccp_alpha=0.28, max_features=6, n_estimators=3;, score=(train=-993598265.089, test=-3658204242.476) total time=   0.2s
[CV 3/5] END ccp_alpha=0.28, max_features=6, n_estimators=3;, score=(train=-956659931.696, test=-3580363860.810) total time=   0.2s
[CV 4/5] END ccp_alpha=0.28, max_features=6, n_estimators=3;, score=(train=-944961182.857, test=-3792545005.149) total time=   0.1s
[CV 5/5] END ccp_alpha=0.28, max_features=6, n_estimators=3;, score=(train=-967357673.215, test=-3555906645.082) total time=   0.1s
[CV 1/5] END ccp_alpha=0.28, max_features=6, n_estimators=10;, score=(train=-527084994.187, test=-2910920344.750) total time=   0.7s
[CV 2/5] END ccp_alpha=0.28, max_features=6, n_estimators=10;, score=(train=-553985886.526, test=-2854905583.515) total time=   0.9s
[CV 3/5] END ccp_alpha=0.28, max_features=6, n_estimators=10;, score=(trai

[CV 3/5] END ccp_alpha=0.3, max_features=6, n_estimators=3;, score=(train=-991436446.761, test=-3608420516.005) total time=   0.2s
[CV 4/5] END ccp_alpha=0.3, max_features=6, n_estimators=3;, score=(train=-954879303.277, test=-3911755259.843) total time=   0.2s
[CV 5/5] END ccp_alpha=0.3, max_features=6, n_estimators=3;, score=(train=-1011381954.858, test=-3749869642.968) total time=   0.1s
[CV 1/5] END ccp_alpha=0.3, max_features=6, n_estimators=10;, score=(train=-530597848.757, test=-2861711678.771) total time=   0.7s
[CV 2/5] END ccp_alpha=0.3, max_features=6, n_estimators=10;, score=(train=-534393759.113, test=-2721524073.547) total time=   0.7s
[CV 3/5] END ccp_alpha=0.3, max_features=6, n_estimators=10;, score=(train=-554573816.366, test=-2840335481.718) total time=   0.8s
[CV 4/5] END ccp_alpha=0.3, max_features=6, n_estimators=10;, score=(train=-525527319.113, test=-2932867319.055) total time=   0.8s
[CV 5/5] END ccp_alpha=0.3, max_features=6, n_estimators=10;, score=(train=-52

[CV 5/5] END ccp_alpha=0.32, max_features=6, n_estimators=3;, score=(train=-1030603963.545, test=-3770947586.939) total time=   0.2s
[CV 1/5] END ccp_alpha=0.32, max_features=6, n_estimators=10;, score=(train=-528057762.530, test=-2936796999.657) total time=   0.8s
[CV 2/5] END ccp_alpha=0.32, max_features=6, n_estimators=10;, score=(train=-545967917.788, test=-2738709526.150) total time=   0.8s
[CV 3/5] END ccp_alpha=0.32, max_features=6, n_estimators=10;, score=(train=-530720264.815, test=-2804675468.819) total time=   0.8s
[CV 4/5] END ccp_alpha=0.32, max_features=6, n_estimators=10;, score=(train=-532967225.378, test=-2923069266.074) total time=   0.7s
[CV 5/5] END ccp_alpha=0.32, max_features=6, n_estimators=10;, score=(train=-533352979.310, test=-2981657071.558) total time=   0.8s
[CV 1/5] END ccp_alpha=0.32, max_features=6, n_estimators=30;, score=(train=-406568705.755, test=-2681789741.482) total time=   2.5s
[CV 2/5] END ccp_alpha=0.32, max_features=6, n_estimators=30;, score=

[CV 2/5] END ccp_alpha=0.34, max_features=6, n_estimators=10;, score=(train=-550935282.365, test=-2805061420.419) total time=   0.8s
[CV 3/5] END ccp_alpha=0.34, max_features=6, n_estimators=10;, score=(train=-546188157.892, test=-2838369266.672) total time=   0.8s
[CV 4/5] END ccp_alpha=0.34, max_features=6, n_estimators=10;, score=(train=-538952280.486, test=-2930066575.096) total time=   0.8s
[CV 5/5] END ccp_alpha=0.34, max_features=6, n_estimators=10;, score=(train=-526519026.515, test=-2925825877.839) total time=   0.8s
[CV 1/5] END ccp_alpha=0.34, max_features=6, n_estimators=30;, score=(train=-403985691.674, test=-2636758962.668) total time=   2.6s
[CV 2/5] END ccp_alpha=0.34, max_features=6, n_estimators=30;, score=(train=-417731234.469, test=-2615191057.253) total time=   2.5s
[CV 3/5] END ccp_alpha=0.34, max_features=6, n_estimators=30;, score=(train=-410299963.653, test=-2552411133.144) total time=   2.6s
[CV 4/5] END ccp_alpha=0.34, max_features=6, n_estimators=30;, score=

[CV 4/5] END ccp_alpha=0.36, max_features=6, n_estimators=10;, score=(train=-542477417.804, test=-3032706510.007) total time=   0.7s
[CV 5/5] END ccp_alpha=0.36, max_features=6, n_estimators=10;, score=(train=-531322536.945, test=-3074435087.302) total time=   0.8s
[CV 1/5] END ccp_alpha=0.36, max_features=6, n_estimators=30;, score=(train=-397751452.856, test=-2650817308.622) total time=   2.4s
[CV 2/5] END ccp_alpha=0.36, max_features=6, n_estimators=30;, score=(train=-416021178.533, test=-2565874328.626) total time=   2.5s
[CV 3/5] END ccp_alpha=0.36, max_features=6, n_estimators=30;, score=(train=-417465737.460, test=-2620649109.592) total time=   2.5s
[CV 4/5] END ccp_alpha=0.36, max_features=6, n_estimators=30;, score=(train=-395378166.994, test=-2716371477.721) total time=   2.5s
[CV 5/5] END ccp_alpha=0.36, max_features=6, n_estimators=30;, score=(train=-410410985.641, test=-2690827435.591) total time=   2.6s
[CV 1/5] END ccp_alpha=0.36, max_features=8, n_estimators=3;, score=(

[CV 1/5] END ccp_alpha=0.38, max_features=6, n_estimators=30;, score=(train=-416438732.004, test=-2667679987.843) total time=   2.7s
[CV 2/5] END ccp_alpha=0.38, max_features=6, n_estimators=30;, score=(train=-419768824.059, test=-2547677341.909) total time=   2.5s
[CV 3/5] END ccp_alpha=0.38, max_features=6, n_estimators=30;, score=(train=-423627929.501, test=-2657012907.595) total time=   2.5s
[CV 4/5] END ccp_alpha=0.38, max_features=6, n_estimators=30;, score=(train=-423394460.213, test=-2749758175.875) total time=   2.5s
[CV 5/5] END ccp_alpha=0.38, max_features=6, n_estimators=30;, score=(train=-418064526.215, test=-2729670269.805) total time=   2.6s
[CV 1/5] END ccp_alpha=0.38, max_features=8, n_estimators=3;, score=(train=-962960309.959, test=-3601372272.326) total time=   0.2s
[CV 2/5] END ccp_alpha=0.38, max_features=8, n_estimators=3;, score=(train=-922373769.960, test=-3538096448.745) total time=   0.2s
[CV 3/5] END ccp_alpha=0.38, max_features=8, n_estimators=3;, score=(tr

[CV 3/5] END ccp_alpha=0.4, max_features=6, n_estimators=30;, score=(train=-411876704.017, test=-2591963198.992) total time=   2.8s
[CV 4/5] END ccp_alpha=0.4, max_features=6, n_estimators=30;, score=(train=-402043919.465, test=-2673244600.006) total time=   2.5s
[CV 5/5] END ccp_alpha=0.4, max_features=6, n_estimators=30;, score=(train=-406274816.066, test=-2675867453.812) total time=   2.4s
[CV 1/5] END ccp_alpha=0.4, max_features=8, n_estimators=3;, score=(train=-970393805.117, test=-3530298226.700) total time=   0.2s
[CV 2/5] END ccp_alpha=0.4, max_features=8, n_estimators=3;, score=(train=-1000265579.802, test=-3672412452.370) total time=   0.3s
[CV 3/5] END ccp_alpha=0.4, max_features=8, n_estimators=3;, score=(train=-993532723.166, test=-3733661892.597) total time=   0.2s
[CV 4/5] END ccp_alpha=0.4, max_features=8, n_estimators=3;, score=(train=-988491159.615, test=-3734278124.177) total time=   0.2s
[CV 5/5] END ccp_alpha=0.4, max_features=8, n_estimators=3;, score=(train=-9699

[CV 5/5] END ccp_alpha=0.42, max_features=6, n_estimators=30;, score=(train=-408311592.733, test=-2718963050.121) total time=   2.5s
[CV 1/5] END ccp_alpha=0.42, max_features=8, n_estimators=3;, score=(train=-924331588.652, test=-3663650668.654) total time=   0.2s
[CV 2/5] END ccp_alpha=0.42, max_features=8, n_estimators=3;, score=(train=-967514193.805, test=-3462915266.820) total time=   0.2s
[CV 3/5] END ccp_alpha=0.42, max_features=8, n_estimators=3;, score=(train=-978213489.084, test=-3568424855.711) total time=   0.2s
[CV 4/5] END ccp_alpha=0.42, max_features=8, n_estimators=3;, score=(train=-966367988.306, test=-3734203208.359) total time=   0.2s
[CV 5/5] END ccp_alpha=0.42, max_features=8, n_estimators=3;, score=(train=-1033902594.061, test=-3842693027.211) total time=   0.3s
[CV 1/5] END ccp_alpha=0.42, max_features=8, n_estimators=10;, score=(train=-517027250.598, test=-2875507290.417) total time=   1.0s
[CV 2/5] END ccp_alpha=0.42, max_features=8, n_estimators=10;, score=(tra

[CV 2/5] END ccp_alpha=0.44, max_features=8, n_estimators=3;, score=(train=-971996286.436, test=-3570880545.262) total time=   0.2s
[CV 3/5] END ccp_alpha=0.44, max_features=8, n_estimators=3;, score=(train=-979132847.298, test=-3443355299.692) total time=   0.2s
[CV 4/5] END ccp_alpha=0.44, max_features=8, n_estimators=3;, score=(train=-1017621297.448, test=-3748911987.524) total time=   0.3s
[CV 5/5] END ccp_alpha=0.44, max_features=8, n_estimators=3;, score=(train=-935951803.798, test=-3655380401.828) total time=   0.2s
[CV 1/5] END ccp_alpha=0.44, max_features=8, n_estimators=10;, score=(train=-515074177.720, test=-2880050337.101) total time=   0.9s
[CV 2/5] END ccp_alpha=0.44, max_features=8, n_estimators=10;, score=(train=-543279423.188, test=-2886193486.171) total time=   1.0s
[CV 3/5] END ccp_alpha=0.44, max_features=8, n_estimators=10;, score=(train=-526968833.691, test=-2778416529.488) total time=   1.1s
[CV 4/5] END ccp_alpha=0.44, max_features=8, n_estimators=10;, score=(tr

[CV 4/5] END ccp_alpha=0.46, max_features=8, n_estimators=3;, score=(train=-952110486.239, test=-3433601997.578) total time=   0.2s
[CV 5/5] END ccp_alpha=0.46, max_features=8, n_estimators=3;, score=(train=-918179110.391, test=-3700489641.712) total time=   0.2s
[CV 1/5] END ccp_alpha=0.46, max_features=8, n_estimators=10;, score=(train=-520288120.042, test=-2793725806.535) total time=   1.0s
[CV 2/5] END ccp_alpha=0.46, max_features=8, n_estimators=10;, score=(train=-527946112.612, test=-2757108615.269) total time=   1.0s
[CV 3/5] END ccp_alpha=0.46, max_features=8, n_estimators=10;, score=(train=-517082389.782, test=-2729146694.400) total time=   1.0s
[CV 4/5] END ccp_alpha=0.46, max_features=8, n_estimators=10;, score=(train=-536733428.248, test=-2855532563.220) total time=   1.0s
[CV 5/5] END ccp_alpha=0.46, max_features=8, n_estimators=10;, score=(train=-547014920.851, test=-2878376215.239) total time=   0.9s
[CV 1/5] END ccp_alpha=0.46, max_features=8, n_estimators=30;, score=(t

[CV 1/5] END ccp_alpha=0.48, max_features=8, n_estimators=10;, score=(train=-518604465.835, test=-2809548908.610) total time=   0.9s
[CV 2/5] END ccp_alpha=0.48, max_features=8, n_estimators=10;, score=(train=-538663758.021, test=-2890035628.840) total time=   1.0s
[CV 3/5] END ccp_alpha=0.48, max_features=8, n_estimators=10;, score=(train=-533479324.349, test=-2773200926.597) total time=   1.0s
[CV 4/5] END ccp_alpha=0.48, max_features=8, n_estimators=10;, score=(train=-512043101.288, test=-2848744578.404) total time=   0.9s
[CV 5/5] END ccp_alpha=0.48, max_features=8, n_estimators=10;, score=(train=-518195979.431, test=-2961887934.407) total time=   1.1s
[CV 1/5] END ccp_alpha=0.48, max_features=8, n_estimators=30;, score=(train=-404967029.883, test=-2577645491.876) total time=   3.2s
[CV 2/5] END ccp_alpha=0.48, max_features=8, n_estimators=30;, score=(train=-410309091.052, test=-2549856333.255) total time=   3.2s
[CV 3/5] END ccp_alpha=0.48, max_features=8, n_estimators=30;, score=

[CV 5/5] END bootstrap=False, ccp_alpha=0.02, max_features=3, n_estimators=10;, score=(train=-0.000, test=-2965973984.301) total time=   0.9s
[CV 1/5] END bootstrap=False, ccp_alpha=0.02, max_features=4, n_estimators=3;, score=(train=-0.000, test=-3888602724.986) total time=   0.3s
[CV 2/5] END bootstrap=False, ccp_alpha=0.02, max_features=4, n_estimators=3;, score=(train=-0.000, test=-3925859108.166) total time=   0.3s
[CV 3/5] END bootstrap=False, ccp_alpha=0.02, max_features=4, n_estimators=3;, score=(train=-0.000, test=-3813252160.030) total time=   0.3s
[CV 4/5] END bootstrap=False, ccp_alpha=0.02, max_features=4, n_estimators=3;, score=(train=-0.000, test=-3698568425.676) total time=   0.2s
[CV 5/5] END bootstrap=False, ccp_alpha=0.02, max_features=4, n_estimators=3;, score=(train=-0.000, test=-3770377478.667) total time=   0.2s
[CV 1/5] END bootstrap=False, ccp_alpha=0.02, max_features=4, n_estimators=10;, score=(train=-0.000, test=-2845683844.645) total time=   1.1s
[CV 2/5] EN

[CV 3/5] END bootstrap=False, ccp_alpha=0.06, max_features=3, n_estimators=10;, score=(train=-0.000, test=-2928992910.973) total time=   0.8s
[CV 4/5] END bootstrap=False, ccp_alpha=0.06, max_features=3, n_estimators=10;, score=(train=-0.000, test=-3081787981.190) total time=   0.9s
[CV 5/5] END bootstrap=False, ccp_alpha=0.06, max_features=3, n_estimators=10;, score=(train=-0.000, test=-3026768308.086) total time=   0.8s
[CV 1/5] END bootstrap=False, ccp_alpha=0.06, max_features=4, n_estimators=3;, score=(train=-0.000, test=-3824162890.296) total time=   0.2s
[CV 2/5] END bootstrap=False, ccp_alpha=0.06, max_features=4, n_estimators=3;, score=(train=-0.000, test=-3712351852.664) total time=   0.2s
[CV 3/5] END bootstrap=False, ccp_alpha=0.06, max_features=4, n_estimators=3;, score=(train=-387095.635, test=-3605760824.268) total time=   0.2s
[CV 4/5] END bootstrap=False, ccp_alpha=0.06, max_features=4, n_estimators=3;, score=(train=-0.000, test=-3923978051.417) total time=   0.2s
[CV 5

[CV 1/5] END bootstrap=False, ccp_alpha=0.1, max_features=3, n_estimators=10;, score=(train=-0.000, test=-3017669745.969) total time=   0.8s
[CV 2/5] END bootstrap=False, ccp_alpha=0.1, max_features=3, n_estimators=10;, score=(train=-111.984, test=-3007756339.176) total time=   0.8s
[CV 3/5] END bootstrap=False, ccp_alpha=0.1, max_features=3, n_estimators=10;, score=(train=-0.000, test=-3042356198.241) total time=   0.8s
[CV 4/5] END bootstrap=False, ccp_alpha=0.1, max_features=3, n_estimators=10;, score=(train=-0.000, test=-2993720107.184) total time=   0.8s
[CV 5/5] END bootstrap=False, ccp_alpha=0.1, max_features=3, n_estimators=10;, score=(train=-0.742, test=-3136497060.930) total time=   0.8s
[CV 1/5] END bootstrap=False, ccp_alpha=0.1, max_features=4, n_estimators=3;, score=(train=-0.000, test=-3875820686.811) total time=   0.2s
[CV 2/5] END bootstrap=False, ccp_alpha=0.1, max_features=4, n_estimators=3;, score=(train=-0.000, test=-3578276946.000) total time=   0.2s
[CV 3/5] END 

[CV 4/5] END bootstrap=False, ccp_alpha=0.14, max_features=3, n_estimators=3;, score=(train=-0.000, test=-3902766078.740) total time=   0.2s
[CV 5/5] END bootstrap=False, ccp_alpha=0.14, max_features=3, n_estimators=3;, score=(train=-0.000, test=-4042250223.741) total time=   0.2s
[CV 1/5] END bootstrap=False, ccp_alpha=0.14, max_features=3, n_estimators=10;, score=(train=-0.742, test=-3087800133.147) total time=   0.7s
[CV 2/5] END bootstrap=False, ccp_alpha=0.14, max_features=3, n_estimators=10;, score=(train=-2.256, test=-2883545696.599) total time=   0.7s
[CV 3/5] END bootstrap=False, ccp_alpha=0.14, max_features=3, n_estimators=10;, score=(train=-4.254, test=-3045538710.361) total time=   0.8s
[CV 4/5] END bootstrap=False, ccp_alpha=0.14, max_features=3, n_estimators=10;, score=(train=-0.000, test=-3067760471.213) total time=   0.8s
[CV 5/5] END bootstrap=False, ccp_alpha=0.14, max_features=3, n_estimators=10;, score=(train=-0.000, test=-3079296747.155) total time=   0.7s
[CV 1/5]

[CV 2/5] END bootstrap=False, ccp_alpha=0.18, max_features=3, n_estimators=3;, score=(train=-0.000, test=-3977087560.247) total time=   0.2s
[CV 3/5] END bootstrap=False, ccp_alpha=0.18, max_features=3, n_estimators=3;, score=(train=-0.000, test=-3961217870.807) total time=   0.2s
[CV 4/5] END bootstrap=False, ccp_alpha=0.18, max_features=3, n_estimators=3;, score=(train=-0.000, test=-4529519787.265) total time=   0.2s
[CV 5/5] END bootstrap=False, ccp_alpha=0.18, max_features=3, n_estimators=3;, score=(train=-4.206, test=-4053717477.745) total time=   0.2s
[CV 1/5] END bootstrap=False, ccp_alpha=0.18, max_features=3, n_estimators=10;, score=(train=-0.000, test=-3029790041.218) total time=   0.8s
[CV 2/5] END bootstrap=False, ccp_alpha=0.18, max_features=3, n_estimators=10;, score=(train=-0.000, test=-2829786927.328) total time=   0.8s
[CV 3/5] END bootstrap=False, ccp_alpha=0.18, max_features=3, n_estimators=10;, score=(train=-0.000, test=-2865568010.976) total time=   0.8s
[CV 4/5] E

[CV 1/5] END bootstrap=False, ccp_alpha=0.22, max_features=3, n_estimators=3;, score=(train=-0.000, test=-3882585589.922) total time=   0.2s
[CV 2/5] END bootstrap=False, ccp_alpha=0.22, max_features=3, n_estimators=3;, score=(train=-0.000, test=-3990817491.527) total time=   0.2s
[CV 3/5] END bootstrap=False, ccp_alpha=0.22, max_features=3, n_estimators=3;, score=(train=-28.430, test=-3842330568.425) total time=   0.2s
[CV 4/5] END bootstrap=False, ccp_alpha=0.22, max_features=3, n_estimators=3;, score=(train=-0.000, test=-3844976038.904) total time=   0.1s
[CV 5/5] END bootstrap=False, ccp_alpha=0.22, max_features=3, n_estimators=3;, score=(train=-0.000, test=-3908525709.320) total time=   0.1s
[CV 1/5] END bootstrap=False, ccp_alpha=0.22, max_features=3, n_estimators=10;, score=(train=-0.000, test=-2944295889.623) total time=   0.8s
[CV 2/5] END bootstrap=False, ccp_alpha=0.22, max_features=3, n_estimators=10;, score=(train=-946.325, test=-2830313494.800) total time=   0.8s
[CV 3/5]

[CV 4/5] END bootstrap=False, ccp_alpha=0.26, max_features=2, n_estimators=10;, score=(train=-0.000, test=-3395502268.795) total time=   0.7s
[CV 5/5] END bootstrap=False, ccp_alpha=0.26, max_features=2, n_estimators=10;, score=(train=-0.000, test=-3260459585.746) total time=   0.7s
[CV 1/5] END bootstrap=False, ccp_alpha=0.26, max_features=3, n_estimators=3;, score=(train=-0.000, test=-4126826965.277) total time=   0.2s
[CV 2/5] END bootstrap=False, ccp_alpha=0.26, max_features=3, n_estimators=3;, score=(train=-0.000, test=-3695366950.678) total time=   0.2s
[CV 3/5] END bootstrap=False, ccp_alpha=0.26, max_features=3, n_estimators=3;, score=(train=-0.000, test=-3793995412.234) total time=   0.2s
[CV 4/5] END bootstrap=False, ccp_alpha=0.26, max_features=3, n_estimators=3;, score=(train=-0.000, test=-3792579113.372) total time=   0.2s
[CV 5/5] END bootstrap=False, ccp_alpha=0.26, max_features=3, n_estimators=3;, score=(train=-0.000, test=-3789098426.418) total time=   0.2s
[CV 1/5] EN

[CV 2/5] END bootstrap=False, ccp_alpha=0.3, max_features=2, n_estimators=10;, score=(train=-0.000, test=-3171553087.916) total time=   0.7s
[CV 3/5] END bootstrap=False, ccp_alpha=0.3, max_features=2, n_estimators=10;, score=(train=-0.010, test=-3110927415.401) total time=   0.8s
[CV 4/5] END bootstrap=False, ccp_alpha=0.3, max_features=2, n_estimators=10;, score=(train=-0.021, test=-3289796293.052) total time=   0.6s
[CV 5/5] END bootstrap=False, ccp_alpha=0.3, max_features=2, n_estimators=10;, score=(train=-0.000, test=-3121972238.127) total time=   0.6s
[CV 1/5] END bootstrap=False, ccp_alpha=0.3, max_features=3, n_estimators=3;, score=(train=-0.000, test=-3614139887.987) total time=   0.2s
[CV 2/5] END bootstrap=False, ccp_alpha=0.3, max_features=3, n_estimators=3;, score=(train=-8.244, test=-4062971271.021) total time=   0.2s
[CV 3/5] END bootstrap=False, ccp_alpha=0.3, max_features=3, n_estimators=3;, score=(train=-0.000, test=-4015256880.552) total time=   0.2s
[CV 4/5] END boo

[CV 1/5] END bootstrap=False, ccp_alpha=0.34, max_features=2, n_estimators=10;, score=(train=-0.000, test=-3245821745.983) total time=   0.4s
[CV 2/5] END bootstrap=False, ccp_alpha=0.34, max_features=2, n_estimators=10;, score=(train=-1641.563, test=-3138388978.269) total time=   0.3s
[CV 3/5] END bootstrap=False, ccp_alpha=0.34, max_features=2, n_estimators=10;, score=(train=-0.000, test=-3051567500.440) total time=   0.4s
[CV 4/5] END bootstrap=False, ccp_alpha=0.34, max_features=2, n_estimators=10;, score=(train=-33.450, test=-3351018346.118) total time=   0.4s
[CV 5/5] END bootstrap=False, ccp_alpha=0.34, max_features=2, n_estimators=10;, score=(train=-948.090, test=-3239022794.228) total time=   0.4s
[CV 1/5] END bootstrap=False, ccp_alpha=0.34, max_features=3, n_estimators=3;, score=(train=-0.000, test=-4022959013.713) total time=   0.1s
[CV 2/5] END bootstrap=False, ccp_alpha=0.34, max_features=3, n_estimators=3;, score=(train=-0.000, test=-3750068865.270) total time=   0.1s
[C

[CV 4/5] END bootstrap=False, ccp_alpha=0.38, max_features=2, n_estimators=3;, score=(train=-4.290, test=-4447269983.496) total time=   0.0s
[CV 5/5] END bootstrap=False, ccp_alpha=0.38, max_features=2, n_estimators=3;, score=(train=-4.710, test=-4143830164.942) total time=   0.0s
[CV 1/5] END bootstrap=False, ccp_alpha=0.38, max_features=2, n_estimators=10;, score=(train=-4.942, test=-3123452825.942) total time=   0.4s
[CV 2/5] END bootstrap=False, ccp_alpha=0.38, max_features=2, n_estimators=10;, score=(train=-1529.859, test=-3154630123.289) total time=   0.5s
[CV 3/5] END bootstrap=False, ccp_alpha=0.38, max_features=2, n_estimators=10;, score=(train=-2.182, test=-3215563914.505) total time=   0.4s
[CV 4/5] END bootstrap=False, ccp_alpha=0.38, max_features=2, n_estimators=10;, score=(train=-2.238, test=-3322562803.247) total time=   0.4s
[CV 5/5] END bootstrap=False, ccp_alpha=0.38, max_features=2, n_estimators=10;, score=(train=-2.429, test=-3001982156.666) total time=   0.4s
[CV 1

[CV 4/5] END bootstrap=False, ccp_alpha=0.42, max_features=2, n_estimators=3;, score=(train=-5.299, test=-4418289785.100) total time=   0.1s
[CV 5/5] END bootstrap=False, ccp_alpha=0.42, max_features=2, n_estimators=3;, score=(train=-5.257, test=-4107648765.246) total time=   0.1s
[CV 1/5] END bootstrap=False, ccp_alpha=0.42, max_features=2, n_estimators=10;, score=(train=-1221.576, test=-3233227699.645) total time=   0.4s
[CV 2/5] END bootstrap=False, ccp_alpha=0.42, max_features=2, n_estimators=10;, score=(train=-2.225, test=-3074778364.574) total time=   0.4s
[CV 3/5] END bootstrap=False, ccp_alpha=0.42, max_features=2, n_estimators=10;, score=(train=-23.823, test=-3189178882.856) total time=   0.4s
[CV 4/5] END bootstrap=False, ccp_alpha=0.42, max_features=2, n_estimators=10;, score=(train=-3787.018, test=-3509121481.127) total time=   0.3s
[CV 5/5] END bootstrap=False, ccp_alpha=0.42, max_features=2, n_estimators=10;, score=(train=-2.059, test=-3316079259.284) total time=   0.5s
[

[CV 2/5] END bootstrap=False, ccp_alpha=0.46, max_features=2, n_estimators=3;, score=(train=-3.407, test=-4042224944.172) total time=   0.1s
[CV 3/5] END bootstrap=False, ccp_alpha=0.46, max_features=2, n_estimators=3;, score=(train=-3.953, test=-4038044743.755) total time=   0.1s
[CV 4/5] END bootstrap=False, ccp_alpha=0.46, max_features=2, n_estimators=3;, score=(train=-4.206, test=-4676242276.115) total time=   0.1s
[CV 5/5] END bootstrap=False, ccp_alpha=0.46, max_features=2, n_estimators=3;, score=(train=-4.458, test=-4254152955.656) total time=   0.1s
[CV 1/5] END bootstrap=False, ccp_alpha=0.46, max_features=2, n_estimators=10;, score=(train=-238.527, test=-3351973018.794) total time=   0.4s
[CV 2/5] END bootstrap=False, ccp_alpha=0.46, max_features=2, n_estimators=10;, score=(train=-2.227, test=-3180691781.944) total time=   0.4s
[CV 3/5] END bootstrap=False, ccp_alpha=0.46, max_features=2, n_estimators=10;, score=(train=-1.894, test=-3131470466.364) total time=   0.4s
[CV 4/5]

[CV 5/5] END bootstrap=False, ccp_alpha=0.48, max_features=4, n_estimators=10;, score=(train=-3.665, test=-2845836125.922) total time=   0.7s


In [13]:
## Best params

grid_search.best_params_ 

{'ccp_alpha': 0.16, 'max_features': 8, 'n_estimators': 30}

In [15]:
## best estimator so that we don't need to build the model again
# provided that GridSearchCV is initialized with `refit=True` i.e. by default

# final tuned model
forest_mod = grid_search.best_estimator_
forest_mod

#### => Since `refit=True` by default, the best estimator above has been already retrained on the whole training set.

In [22]:
## best estimator score

np.sqrt(-grid_search.best_score_)

50734.354718569

In [20]:
## CV results
cv_results = grid_search.cv_results_

## In a formatted manner
for mean_score, params in zip(cv_results["mean_test_score"], cv_results["params"]):
    print(np.sqrt(-mean_score), params)

67076.24179891721 {'ccp_alpha': 0.0, 'max_features': 2, 'n_estimators': 3}
58014.04102367006 {'ccp_alpha': 0.0, 'max_features': 2, 'n_estimators': 10}
54964.777918894535 {'ccp_alpha': 0.0, 'max_features': 2, 'n_estimators': 30}
63697.69399641872 {'ccp_alpha': 0.0, 'max_features': 4, 'n_estimators': 3}
55090.72535349553 {'ccp_alpha': 0.0, 'max_features': 4, 'n_estimators': 10}
52246.11850363178 {'ccp_alpha': 0.0, 'max_features': 4, 'n_estimators': 30}
60988.27741976439 {'ccp_alpha': 0.0, 'max_features': 6, 'n_estimators': 3}
53697.35222523012 {'ccp_alpha': 0.0, 'max_features': 6, 'n_estimators': 10}
51663.991089948235 {'ccp_alpha': 0.0, 'max_features': 6, 'n_estimators': 30}
60880.85565058071 {'ccp_alpha': 0.0, 'max_features': 8, 'n_estimators': 3}
53134.22232274666 {'ccp_alpha': 0.0, 'max_features': 8, 'n_estimators': 10}
50924.524275355936 {'ccp_alpha': 0.0, 'max_features': 8, 'n_estimators': 30}
66584.5174122614 {'ccp_alpha': 0.02, 'max_features': 2, 'n_estimators': 3}
57792.36771648

### # Inspecting the contribution weightage of each feature for the best estimator:

In [24]:
## feature importances (Contribution of each feature)

feature_importances = grid_search.best_estimator_.feature_importances_

In [26]:
## Importing a class contributing to the fullpipeline

from CombinedAttributeAdder import CombinedAttributesAdder2

In [27]:
## loading the transformation pipeline
import pickle

full_pipeline = pickle.load(open("transformation_pipeline.pkl", "rb"))
full_pipeline

In [28]:
cat_encoder = full_pipeline.named_transformers_['cat']
cat_1hot_atts = cat_encoder.categories_[0]
cat_1hot_atts

array(['<1H OCEAN', 'INLAND', 'ISLAND', 'NEAR BAY', 'NEAR OCEAN'],
      dtype=object)

In [29]:
extras = ["rooms_per_hhold", "pop_per_hhold", "bedrooms_per_room"]

atts = list(X.drop(columns=["ocean_proximity"]).columns) + extras + list(cat_1hot_atts)
atts

['longitude',
 'latitude',
 'housing_median_age',
 'total_rooms',
 'total_bedrooms',
 'population',
 'households',
 'median_income',
 'rooms_per_hhold',
 'pop_per_hhold',
 'bedrooms_per_room',
 '<1H OCEAN',
 'INLAND',
 'ISLAND',
 'NEAR BAY',
 'NEAR OCEAN']

In [30]:
## Contribution of each feature towards the model

sorted(zip(feature_importances, atts), reverse=True)

[(0.3815417899839492, 'median_income'),
 (0.1516016774062897, 'INLAND'),
 (0.09808977131892661, 'longitude'),
 (0.08327942693645117, 'latitude'),
 (0.04969287989194938, 'rooms_per_hhold'),
 (0.04756738401831011, 'bedrooms_per_room'),
 (0.044787455139893614, 'housing_median_age'),
 (0.03632463277882869, 'pop_per_hhold'),
 (0.029084586233724365, 'population'),
 (0.020781070409457324, 'total_bedrooms'),
 (0.020738880671857644, 'total_rooms'),
 (0.019960477145649306, 'households'),
 (0.008566934053759785, '<1H OCEAN'),
 (0.0054956133628834974, 'NEAR OCEAN'),
 (0.0024463868317928287, 'NEAR BAY'),
 (4.103381627667079e-05, 'ISLAND')]

In [31]:
df_prepared = pd.DataFrame(X_prepared, columns=atts)
df_prepared.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,rooms_per_hhold,pop_per_hhold,bedrooms_per_room,<1H OCEAN,INLAND,ISLAND,NEAR BAY,NEAR OCEAN
0,-0.94135,1.347438,0.027564,0.584777,0.640371,0.732602,0.556286,-0.893647,-0.121122,0.017395,-0.121122,0.0,1.0,0.0,0.0,0.0
1,1.171782,-1.19244,-1.722018,1.261467,0.781561,0.533612,0.721318,1.292168,-0.810867,0.569256,-0.810867,0.0,0.0,0.0,0.0,1.0
2,0.267581,-0.125972,1.22046,-0.469773,-0.545138,-0.674675,-0.524407,-0.525434,-0.338273,-0.018024,-0.338273,0.0,1.0,0.0,0.0,0.0
3,1.221738,-1.351474,-0.370069,-0.348652,-0.036367,-0.467617,-0.037297,-0.865929,0.961205,-0.59514,0.961205,0.0,0.0,0.0,0.0,1.0
4,0.437431,-0.635818,-0.131489,0.427179,0.27279,0.37406,0.220898,0.325752,-0.474513,0.251241,-0.474513,1.0,0.0,0.0,0.0,0.0


In [32]:
## mundane columns: features I decided to drop and retrain the model

mundane_cols = ["total_bedrooms", "total_rooms", "households", "NEAR OCEAN", "<1H OCEAN",
                "NEAR BAY", "ISLAND"]

## # Evaluating the model on the test set:

In [33]:
## load the test set

test_set = pd.read_csv("strat_test_set.csv")
test_set.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-118.39,34.12,29.0,6447.0,1012.0,2184.0,960.0,8.2816,500001.0,<1H OCEAN
1,-120.42,34.89,24.0,2020.0,307.0,855.0,283.0,5.0099,162500.0,<1H OCEAN
2,-118.45,34.25,36.0,1453.0,270.0,808.0,275.0,4.3839,204600.0,<1H OCEAN
3,-118.1,33.91,35.0,1653.0,325.0,1072.0,301.0,3.2708,159700.0,<1H OCEAN
4,-117.07,32.77,38.0,3779.0,614.0,1495.0,614.0,4.3529,184000.0,NEAR OCEAN


In [34]:
## test features and labels

X_test = test_set.drop(columns=["median_house_value"])
Y_test = test_set["median_house_value"].copy()

In [35]:
## transformation of test features

X_test_prepared = full_pipeline.transform(X_test)

In [49]:
## Evaluation of the final model on the TEST SET

final_acc = forest_mod.score(X_test_prepared, Y_test)
final_acc

0.8186615323589788

#### => This is seriously someting! `:D`

In [46]:
## rmse

final_preds = forest_mod.predict(X_test_prepared)
final_rmse = np.sqrt(mean_squared_error(Y_test, final_preds))
final_rmse

48614.93059260003

In [50]:
report["forest_mod (finalized)"] = {"r2": final_acc,"rmse": final_rmse}

report

{'mod1 (Linear Regression)': {'r2': 0.648079515380494,
  'rmse': 68635.27064635929},
 'mod2 (DescisionTreeRegressor)': {'r2': 1.0, 'rmse': 0.0},
 'mod2.1 (dt cv)': {'rmse': 69620.91130100285},
 'mod1.1 (lr cv)': {'rmse': 69027.45964364048},
 'mod3 (RandomForestRegressor)': {'rmse': 18490.47999527855},
 'mod3.1 (rf cv)': {'rmse': 49720.022020119024},
 'forest_mod (finalized)': {'r2': 0.8186615323589788,
  'rmse': 48614.93059260003}}

#### => too good to be true!

### # Saving the final model:

In [42]:
## saving the final model

import pickle

pickle.dump(forest_mod, open("model.pkl", "wb"))

### # Creating a `Confidence Interval`:

#### => To account for precision, we should create a 95% confidence interval for generalization error.

In [54]:
np.sqrt((final_preds - Y_test)**2).mean()

32433.56773594207

In [55]:
final_rmse

48614.93059260003

In [56]:
confidence = .95
squared_errors = (final_preds - Y_test)**2
np.sqrt( stats.t.interval(confidence, len(squared_errors-1), loc=squared_errors.mean(),
                         scale=stats.sem(squared_errors)) )

array([46633.30880693, 50518.88223985])