In [27]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import GridSearchCV

In [28]:
data = pd.read_csv("car-sales-extended-missing-data.csv")
data

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Honda,White,35431.0,4.0,15323.0
1,BMW,Blue,192714.0,5.0,19943.0
2,Honda,White,84714.0,4.0,28343.0
3,Toyota,White,154365.0,4.0,13434.0
4,Nissan,Blue,181577.0,3.0,14043.0
...,...,...,...,...,...
995,Toyota,Black,35820.0,4.0,32042.0
996,,White,155144.0,3.0,5716.0
997,Nissan,Blue,66604.0,4.0,31570.0
998,Honda,White,215883.0,4.0,4001.0


In [29]:
data.isna().sum()

Make             49
Colour           50
Odometer (KM)    50
Doors            50
Price            50
dtype: int64

In [30]:
data = data.dropna(subset='Price')

In [31]:
data.isna().sum()

Make             47
Colour           46
Odometer (KM)    48
Doors            47
Price             0
dtype: int64

In [49]:
categorical_features = ['Make', 'Colour']
categorical_transform = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy='constant', fill_value='missing')),
    ("one_hot", OneHotEncoder(handle_unknown='ignore'))
])

In [50]:
door_feature = ['Doors']
door_transform = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy='constant', fill_value=4))
])

In [51]:
numerical_feature = ['Odometer (KM)']
numerical_transform = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy='mean'))
])

In [52]:
X = data.drop('Price', axis=1)
y = data['Price']

In [53]:
transformer = ColumnTransformer(transformers=[
    ("categorical", categorical_transform, categorical_features),
    ("door", door_transform, door_feature),
    ("numerical", numerical_transform, numerical_feature)
])

In [54]:
X_transform = transformer.fit_transform(X)

In [57]:
X_transform_df = pd.DataFrame(X_transform)
X_transform_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,4.0,35431.0
1,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,5.0,192714.0
2,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,4.0,84714.0
3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,4.0,154365.0
4,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,3.0,181577.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
945,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,4.0,35820.0
946,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,3.0,155144.0
947,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,4.0,66604.0
948,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,4.0,215883.0


In [37]:
model = RandomForestRegressor(n_jobs=-1)

In [38]:
def evaluation_metrics(y_true, y_preds):
    r2_squared = r2_score(y_true, y_preds)
    MAE = mean_absolute_error(y_true, y_preds)
    MSE = mean_squared_error(y_true, y_preds)
    
    metrics = {"R2-Square" : round(r2_squared, 2),
               "Mean-Absolute-Error" : round(MAE, 2),
               "Mean-Squared-Error" : round(MSE, 2)}
    
    print(f"R2-Squared is {r2_squared*100:.2f}%")
    print(f"Mean-Absolute-Error is {MAE:.2f}")
    print(f"Mean-Squared-Error is {MSE:.2f}")
    
    return metrics

In [39]:
model.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'criterion': 'squared_error',
 'max_depth': None,
 'max_features': 1.0,
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': -1,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [40]:
param = {"n_estimators" : [10, 100, 200, 250],
         "max_depth" : [None, 2, 3, 5],
         "max_features" : [None, 'sqrt', 'log2'],
         "min_samples_split" : [2, 3, 4],
         "min_samples_leaf" : [1, 2, 3]}

In [58]:
X_train, X_test, y_train, y_test = train_test_split(X_transform_df, y, test_size=0.2, random_state=5)

In [59]:
grid_cv = GridSearchCV(estimator=model, param_grid=param, cv=5, verbose=2)

In [61]:
grid_cv.fit(X_train, y_train)

Fitting 5 folds for each of 432 candidates, totalling 2160 fits
[CV] END max_depth=None, max_features=None, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   6.0s
[CV] END max_depth=None, max_features=None, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   1.9s
[CV] END max_depth=None, max_features=None, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   0.0s
[CV] END max_depth=None, max_features=None, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   0.0s
[CV] END max_depth=None, max_features=None, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   0.0s
[CV] END max_depth=None, max_features=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.3s
[CV] END max_depth=None, max_features=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.3s
[CV] END max_depth=None, max_features=None, min_samples_leaf=1, min_samples_split=2, n_

[CV] END max_depth=None, max_features=None, min_samples_leaf=2, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=None, max_features=None, min_samples_leaf=2, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=None, max_features=None, min_samples_leaf=2, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=None, max_features=None, min_samples_leaf=2, min_samples_split=2, n_estimators=200; total time=   0.4s
[CV] END max_depth=None, max_features=None, min_samples_leaf=2, min_samples_split=2, n_estimators=200; total time=   0.4s
[CV] END max_depth=None, max_features=None, min_samples_leaf=2, min_samples_split=2, n_estimators=200; total time=   0.4s
[CV] END max_depth=None, max_features=None, min_samples_leaf=2, min_samples_split=2, n_estimators=200; total time=   0.4s
[CV] END max_depth=None, max_features=None, min_samples_leaf=2, min_samples_split=2, n_estimators=200; total time=   0.4s
[CV] END max_depth=None,

[CV] END max_depth=None, max_features=None, min_samples_leaf=3, min_samples_split=2, n_estimators=250; total time=   0.6s
[CV] END max_depth=None, max_features=None, min_samples_leaf=3, min_samples_split=2, n_estimators=250; total time=   0.6s
[CV] END max_depth=None, max_features=None, min_samples_leaf=3, min_samples_split=2, n_estimators=250; total time=   0.5s
[CV] END max_depth=None, max_features=None, min_samples_leaf=3, min_samples_split=2, n_estimators=250; total time=   0.6s
[CV] END max_depth=None, max_features=None, min_samples_leaf=3, min_samples_split=2, n_estimators=250; total time=   0.6s
[CV] END max_depth=None, max_features=None, min_samples_leaf=3, min_samples_split=3, n_estimators=10; total time=   0.0s
[CV] END max_depth=None, max_features=None, min_samples_leaf=3, min_samples_split=3, n_estimators=10; total time=   0.0s
[CV] END max_depth=None, max_features=None, min_samples_leaf=3, min_samples_split=3, n_estimators=10; total time=   0.0s
[CV] END max_depth=None, ma

[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=1, min_samples_split=3, n_estimators=10; total time=   0.0s
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=1, min_samples_split=3, n_estimators=10; total time=   0.0s
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=1, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=1, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=1, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=1, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=1, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=1, min_samples_split=3, n_estimators=200; total time=   0.4s
[CV] END max_depth=None, m

[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=2, min_samples_split=3, n_estimators=200; total time=   0.4s
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=2, min_samples_split=3, n_estimators=200; total time=   0.4s
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=2, min_samples_split=3, n_estimators=200; total time=   0.4s
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=2, min_samples_split=3, n_estimators=200; total time=   0.4s
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=2, min_samples_split=3, n_estimators=250; total time=   0.5s
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=2, min_samples_split=3, n_estimators=250; total time=   0.5s
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=2, min_samples_split=3, n_estimators=250; total time=   0.5s
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=2, min_samples_split=3, n_estimators=250; total time=   0.6s
[CV] END max_depth=None,

[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=3, min_samples_split=3, n_estimators=250; total time=   0.5s
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=3, min_samples_split=4, n_estimators=10; total time=   0.0s
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=3, min_samples_split=4, n_estimators=10; total time=   0.0s
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=3, min_samples_split=4, n_estimators=10; total time=   0.0s
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=3, min_samples_split=4, n_estimators=10; total time=   0.0s
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=3, min_samples_split=4, n_estimators=10; total time=   0.0s
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=3, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=3, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=None, max_

[CV] END max_depth=None, max_features=log2, min_samples_leaf=1, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=None, max_features=log2, min_samples_leaf=1, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=None, max_features=log2, min_samples_leaf=1, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=None, max_features=log2, min_samples_leaf=1, min_samples_split=4, n_estimators=200; total time=   0.4s
[CV] END max_depth=None, max_features=log2, min_samples_leaf=1, min_samples_split=4, n_estimators=200; total time=   0.4s
[CV] END max_depth=None, max_features=log2, min_samples_leaf=1, min_samples_split=4, n_estimators=200; total time=   0.4s
[CV] END max_depth=None, max_features=log2, min_samples_leaf=1, min_samples_split=4, n_estimators=200; total time=   0.4s
[CV] END max_depth=None, max_features=log2, min_samples_leaf=1, min_samples_split=4, n_estimators=200; total time=   0.4s
[CV] END max_depth=None,

[CV] END max_depth=None, max_features=log2, min_samples_leaf=2, min_samples_split=4, n_estimators=250; total time=   0.5s
[CV] END max_depth=None, max_features=log2, min_samples_leaf=2, min_samples_split=4, n_estimators=250; total time=   0.6s
[CV] END max_depth=None, max_features=log2, min_samples_leaf=2, min_samples_split=4, n_estimators=250; total time=   0.7s
[CV] END max_depth=None, max_features=log2, min_samples_leaf=2, min_samples_split=4, n_estimators=250; total time=   0.6s
[CV] END max_depth=None, max_features=log2, min_samples_leaf=2, min_samples_split=4, n_estimators=250; total time=   0.6s
[CV] END max_depth=None, max_features=log2, min_samples_leaf=3, min_samples_split=2, n_estimators=10; total time=   0.0s
[CV] END max_depth=None, max_features=log2, min_samples_leaf=3, min_samples_split=2, n_estimators=10; total time=   0.0s
[CV] END max_depth=None, max_features=log2, min_samples_leaf=3, min_samples_split=2, n_estimators=10; total time=   0.0s
[CV] END max_depth=None, ma

[CV] END max_depth=2, max_features=None, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   0.0s
[CV] END max_depth=2, max_features=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=2, max_features=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=2, max_features=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=2, max_features=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=2, max_features=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=2, max_features=None, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time=   0.4s
[CV] END max_depth=2, max_features=None, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time=   0.4s
[CV] END max_depth=2, max_features=None, min_samp

[CV] END max_depth=2, max_features=None, min_samples_leaf=2, min_samples_split=2, n_estimators=200; total time=   0.4s
[CV] END max_depth=2, max_features=None, min_samples_leaf=2, min_samples_split=2, n_estimators=200; total time=   0.4s
[CV] END max_depth=2, max_features=None, min_samples_leaf=2, min_samples_split=2, n_estimators=250; total time=   0.6s
[CV] END max_depth=2, max_features=None, min_samples_leaf=2, min_samples_split=2, n_estimators=250; total time=   0.5s
[CV] END max_depth=2, max_features=None, min_samples_leaf=2, min_samples_split=2, n_estimators=250; total time=   0.5s
[CV] END max_depth=2, max_features=None, min_samples_leaf=2, min_samples_split=2, n_estimators=250; total time=   0.5s
[CV] END max_depth=2, max_features=None, min_samples_leaf=2, min_samples_split=2, n_estimators=250; total time=   0.5s
[CV] END max_depth=2, max_features=None, min_samples_leaf=2, min_samples_split=3, n_estimators=10; total time=   0.0s
[CV] END max_depth=2, max_features=None, min_samp

[CV] END max_depth=2, max_features=None, min_samples_leaf=3, min_samples_split=3, n_estimators=10; total time=   0.0s
[CV] END max_depth=2, max_features=None, min_samples_leaf=3, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=2, max_features=None, min_samples_leaf=3, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=2, max_features=None, min_samples_leaf=3, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=2, max_features=None, min_samples_leaf=3, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=2, max_features=None, min_samples_leaf=3, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=2, max_features=None, min_samples_leaf=3, min_samples_split=3, n_estimators=200; total time=   0.4s
[CV] END max_depth=2, max_features=None, min_samples_leaf=3, min_samples_split=3, n_estimators=200; total time=   0.4s
[CV] END max_depth=2, max_features=None, min_samp

[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=1, min_samples_split=3, n_estimators=200; total time=   0.4s
[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=1, min_samples_split=3, n_estimators=200; total time=   0.4s
[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=1, min_samples_split=3, n_estimators=250; total time=   0.5s
[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=1, min_samples_split=3, n_estimators=250; total time=   0.5s
[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=1, min_samples_split=3, n_estimators=250; total time=   0.5s
[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=1, min_samples_split=3, n_estimators=250; total time=   0.5s
[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=1, min_samples_split=3, n_estimators=250; total time=   0.5s
[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=1, min_samples_split=4, n_estimators=10; total time=   0.0s
[CV] END max_depth=2, max_features=sqrt, min_samp

[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=2, min_samples_split=4, n_estimators=10; total time=   0.0s
[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=2, min_samples_split=4, n_estimators=10; total time=   0.0s
[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=2, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=2, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=2, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=2, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=2, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=2, min_samples_split=4, n_estimators=200; total time=   0.4s
[CV] END max_depth=2, max_features=sqrt, min_sampl

[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=3, min_samples_split=4, n_estimators=200; total time=   0.9s
[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=3, min_samples_split=4, n_estimators=200; total time=   0.4s
[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=3, min_samples_split=4, n_estimators=200; total time=   0.4s
[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=3, min_samples_split=4, n_estimators=250; total time=   0.5s
[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=3, min_samples_split=4, n_estimators=250; total time=   0.5s
[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=3, min_samples_split=4, n_estimators=250; total time=   0.5s
[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=3, min_samples_split=4, n_estimators=250; total time=   0.5s
[CV] END max_depth=2, max_features=sqrt, min_samples_leaf=3, min_samples_split=4, n_estimators=250; total time=   0.5s
[CV] END max_depth=2, max_features=log2, min_sam

[CV] END max_depth=2, max_features=log2, min_samples_leaf=2, min_samples_split=2, n_estimators=10; total time=   0.0s
[CV] END max_depth=2, max_features=log2, min_samples_leaf=2, min_samples_split=2, n_estimators=10; total time=   0.0s
[CV] END max_depth=2, max_features=log2, min_samples_leaf=2, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=2, max_features=log2, min_samples_leaf=2, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=2, max_features=log2, min_samples_leaf=2, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=2, max_features=log2, min_samples_leaf=2, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=2, max_features=log2, min_samples_leaf=2, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=2, max_features=log2, min_samples_leaf=2, min_samples_split=2, n_estimators=200; total time=   0.4s
[CV] END max_depth=2, max_features=log2, min_sampl

[CV] END max_depth=2, max_features=log2, min_samples_leaf=3, min_samples_split=2, n_estimators=200; total time=   0.4s
[CV] END max_depth=2, max_features=log2, min_samples_leaf=3, min_samples_split=2, n_estimators=200; total time=   0.4s
[CV] END max_depth=2, max_features=log2, min_samples_leaf=3, min_samples_split=2, n_estimators=200; total time=   0.4s
[CV] END max_depth=2, max_features=log2, min_samples_leaf=3, min_samples_split=2, n_estimators=250; total time=   0.5s
[CV] END max_depth=2, max_features=log2, min_samples_leaf=3, min_samples_split=2, n_estimators=250; total time=   0.5s
[CV] END max_depth=2, max_features=log2, min_samples_leaf=3, min_samples_split=2, n_estimators=250; total time=   0.5s
[CV] END max_depth=2, max_features=log2, min_samples_leaf=3, min_samples_split=2, n_estimators=250; total time=   0.5s
[CV] END max_depth=2, max_features=log2, min_samples_leaf=3, min_samples_split=2, n_estimators=250; total time=   0.5s
[CV] END max_depth=2, max_features=log2, min_sam

[CV] END max_depth=3, max_features=None, min_samples_leaf=1, min_samples_split=3, n_estimators=10; total time=   0.0s
[CV] END max_depth=3, max_features=None, min_samples_leaf=1, min_samples_split=3, n_estimators=10; total time=   0.0s
[CV] END max_depth=3, max_features=None, min_samples_leaf=1, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=3, max_features=None, min_samples_leaf=1, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=3, max_features=None, min_samples_leaf=1, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=3, max_features=None, min_samples_leaf=1, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=3, max_features=None, min_samples_leaf=1, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=3, max_features=None, min_samples_leaf=1, min_samples_split=3, n_estimators=200; total time=   0.4s
[CV] END max_depth=3, max_features=None, min_sampl

[CV] END max_depth=3, max_features=None, min_samples_leaf=2, min_samples_split=3, n_estimators=200; total time=   0.4s
[CV] END max_depth=3, max_features=None, min_samples_leaf=2, min_samples_split=3, n_estimators=200; total time=   0.4s
[CV] END max_depth=3, max_features=None, min_samples_leaf=2, min_samples_split=3, n_estimators=200; total time=   0.4s
[CV] END max_depth=3, max_features=None, min_samples_leaf=2, min_samples_split=3, n_estimators=250; total time=   0.5s
[CV] END max_depth=3, max_features=None, min_samples_leaf=2, min_samples_split=3, n_estimators=250; total time=   0.5s
[CV] END max_depth=3, max_features=None, min_samples_leaf=2, min_samples_split=3, n_estimators=250; total time=   0.5s
[CV] END max_depth=3, max_features=None, min_samples_leaf=2, min_samples_split=3, n_estimators=250; total time=   0.5s
[CV] END max_depth=3, max_features=None, min_samples_leaf=2, min_samples_split=3, n_estimators=250; total time=   0.5s
[CV] END max_depth=3, max_features=None, min_sam

[CV] END max_depth=3, max_features=None, min_samples_leaf=3, min_samples_split=4, n_estimators=10; total time=   0.0s
[CV] END max_depth=3, max_features=None, min_samples_leaf=3, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=3, max_features=None, min_samples_leaf=3, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=3, max_features=None, min_samples_leaf=3, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=3, max_features=None, min_samples_leaf=3, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=3, max_features=None, min_samples_leaf=3, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=3, max_features=None, min_samples_leaf=3, min_samples_split=4, n_estimators=200; total time=   0.4s
[CV] END max_depth=3, max_features=None, min_samples_leaf=3, min_samples_split=4, n_estimators=200; total time=   0.4s
[CV] END max_depth=3, max_features=None, min_samp

[CV] END max_depth=3, max_features=sqrt, min_samples_leaf=1, min_samples_split=4, n_estimators=200; total time=   0.4s
[CV] END max_depth=3, max_features=sqrt, min_samples_leaf=1, min_samples_split=4, n_estimators=200; total time=   0.4s
[CV] END max_depth=3, max_features=sqrt, min_samples_leaf=1, min_samples_split=4, n_estimators=250; total time=   0.5s
[CV] END max_depth=3, max_features=sqrt, min_samples_leaf=1, min_samples_split=4, n_estimators=250; total time=   0.5s
[CV] END max_depth=3, max_features=sqrt, min_samples_leaf=1, min_samples_split=4, n_estimators=250; total time=   0.5s
[CV] END max_depth=3, max_features=sqrt, min_samples_leaf=1, min_samples_split=4, n_estimators=250; total time=   0.5s
[CV] END max_depth=3, max_features=sqrt, min_samples_leaf=1, min_samples_split=4, n_estimators=250; total time=   0.5s
[CV] END max_depth=3, max_features=sqrt, min_samples_leaf=2, min_samples_split=2, n_estimators=10; total time=   0.0s
[CV] END max_depth=3, max_features=sqrt, min_samp

[CV] END max_depth=3, max_features=sqrt, min_samples_leaf=3, min_samples_split=2, n_estimators=10; total time=   0.0s
[CV] END max_depth=3, max_features=sqrt, min_samples_leaf=3, min_samples_split=2, n_estimators=10; total time=   0.0s
[CV] END max_depth=3, max_features=sqrt, min_samples_leaf=3, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=3, max_features=sqrt, min_samples_leaf=3, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=3, max_features=sqrt, min_samples_leaf=3, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=3, max_features=sqrt, min_samples_leaf=3, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=3, max_features=sqrt, min_samples_leaf=3, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=3, max_features=sqrt, min_samples_leaf=3, min_samples_split=2, n_estimators=200; total time=   0.4s
[CV] END max_depth=3, max_features=sqrt, min_sampl

[CV] END max_depth=3, max_features=log2, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time=   0.4s
[CV] END max_depth=3, max_features=log2, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time=   0.4s
[CV] END max_depth=3, max_features=log2, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time=   0.4s
[CV] END max_depth=3, max_features=log2, min_samples_leaf=1, min_samples_split=2, n_estimators=250; total time=   0.5s
[CV] END max_depth=3, max_features=log2, min_samples_leaf=1, min_samples_split=2, n_estimators=250; total time=   0.5s
[CV] END max_depth=3, max_features=log2, min_samples_leaf=1, min_samples_split=2, n_estimators=250; total time=   0.5s
[CV] END max_depth=3, max_features=log2, min_samples_leaf=1, min_samples_split=2, n_estimators=250; total time=   0.5s
[CV] END max_depth=3, max_features=log2, min_samples_leaf=1, min_samples_split=2, n_estimators=250; total time=   0.5s
[CV] END max_depth=3, max_features=log2, min_sam

[CV] END max_depth=3, max_features=log2, min_samples_leaf=2, min_samples_split=3, n_estimators=10; total time=   0.0s
[CV] END max_depth=3, max_features=log2, min_samples_leaf=2, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=3, max_features=log2, min_samples_leaf=2, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=3, max_features=log2, min_samples_leaf=2, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=3, max_features=log2, min_samples_leaf=2, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=3, max_features=log2, min_samples_leaf=2, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=3, max_features=log2, min_samples_leaf=2, min_samples_split=3, n_estimators=200; total time=   0.4s
[CV] END max_depth=3, max_features=log2, min_samples_leaf=2, min_samples_split=3, n_estimators=200; total time=   0.4s
[CV] END max_depth=3, max_features=log2, min_samp

[CV] END max_depth=3, max_features=log2, min_samples_leaf=3, min_samples_split=3, n_estimators=200; total time=   0.4s
[CV] END max_depth=3, max_features=log2, min_samples_leaf=3, min_samples_split=3, n_estimators=200; total time=   0.4s
[CV] END max_depth=3, max_features=log2, min_samples_leaf=3, min_samples_split=3, n_estimators=250; total time=   0.5s
[CV] END max_depth=3, max_features=log2, min_samples_leaf=3, min_samples_split=3, n_estimators=250; total time=   0.5s
[CV] END max_depth=3, max_features=log2, min_samples_leaf=3, min_samples_split=3, n_estimators=250; total time=   0.6s
[CV] END max_depth=3, max_features=log2, min_samples_leaf=3, min_samples_split=3, n_estimators=250; total time=   0.6s
[CV] END max_depth=3, max_features=log2, min_samples_leaf=3, min_samples_split=3, n_estimators=250; total time=   0.5s
[CV] END max_depth=3, max_features=log2, min_samples_leaf=3, min_samples_split=4, n_estimators=10; total time=   0.0s
[CV] END max_depth=3, max_features=log2, min_samp

[CV] END max_depth=5, max_features=None, min_samples_leaf=1, min_samples_split=4, n_estimators=10; total time=   0.0s
[CV] END max_depth=5, max_features=None, min_samples_leaf=1, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, max_features=None, min_samples_leaf=1, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, max_features=None, min_samples_leaf=1, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, max_features=None, min_samples_leaf=1, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, max_features=None, min_samples_leaf=1, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, max_features=None, min_samples_leaf=1, min_samples_split=4, n_estimators=200; total time=   0.4s
[CV] END max_depth=5, max_features=None, min_samples_leaf=1, min_samples_split=4, n_estimators=200; total time=   0.4s
[CV] END max_depth=5, max_features=None, min_samp

[CV] END max_depth=5, max_features=None, min_samples_leaf=2, min_samples_split=4, n_estimators=200; total time=   0.4s
[CV] END max_depth=5, max_features=None, min_samples_leaf=2, min_samples_split=4, n_estimators=200; total time=   0.4s
[CV] END max_depth=5, max_features=None, min_samples_leaf=2, min_samples_split=4, n_estimators=250; total time=   0.5s
[CV] END max_depth=5, max_features=None, min_samples_leaf=2, min_samples_split=4, n_estimators=250; total time=   0.5s
[CV] END max_depth=5, max_features=None, min_samples_leaf=2, min_samples_split=4, n_estimators=250; total time=   0.6s
[CV] END max_depth=5, max_features=None, min_samples_leaf=2, min_samples_split=4, n_estimators=250; total time=   0.5s
[CV] END max_depth=5, max_features=None, min_samples_leaf=2, min_samples_split=4, n_estimators=250; total time=   0.6s
[CV] END max_depth=5, max_features=None, min_samples_leaf=3, min_samples_split=2, n_estimators=10; total time=   0.0s
[CV] END max_depth=5, max_features=None, min_samp

[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   0.0s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   0.0s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time=   0.4s
[CV] END max_depth=5, max_features=sqrt, min_sampl

[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=2, min_samples_split=2, n_estimators=200; total time=   0.4s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=2, min_samples_split=2, n_estimators=200; total time=   0.4s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=2, min_samples_split=2, n_estimators=200; total time=   0.4s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=2, min_samples_split=2, n_estimators=250; total time=   0.5s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=2, min_samples_split=2, n_estimators=250; total time=   0.6s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=2, min_samples_split=2, n_estimators=250; total time=   0.6s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=2, min_samples_split=2, n_estimators=250; total time=   0.5s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=2, min_samples_split=2, n_estimators=250; total time=   0.5s
[CV] END max_depth=5, max_features=sqrt, min_sam

[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=3, min_samples_split=3, n_estimators=10; total time=   0.0s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=3, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=3, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=3, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=3, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=3, min_samples_split=3, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=3, min_samples_split=3, n_estimators=200; total time=   0.4s
[CV] END max_depth=5, max_features=sqrt, min_samples_leaf=3, min_samples_split=3, n_estimators=200; total time=   0.4s
[CV] END max_depth=5, max_features=sqrt, min_samp

[CV] END max_depth=5, max_features=log2, min_samples_leaf=1, min_samples_split=3, n_estimators=200; total time=   0.4s
[CV] END max_depth=5, max_features=log2, min_samples_leaf=1, min_samples_split=3, n_estimators=200; total time=   0.4s
[CV] END max_depth=5, max_features=log2, min_samples_leaf=1, min_samples_split=3, n_estimators=250; total time=   0.5s
[CV] END max_depth=5, max_features=log2, min_samples_leaf=1, min_samples_split=3, n_estimators=250; total time=   0.5s
[CV] END max_depth=5, max_features=log2, min_samples_leaf=1, min_samples_split=3, n_estimators=250; total time=   0.5s
[CV] END max_depth=5, max_features=log2, min_samples_leaf=1, min_samples_split=3, n_estimators=250; total time=   0.5s
[CV] END max_depth=5, max_features=log2, min_samples_leaf=1, min_samples_split=3, n_estimators=250; total time=   0.5s
[CV] END max_depth=5, max_features=log2, min_samples_leaf=1, min_samples_split=4, n_estimators=10; total time=   0.0s
[CV] END max_depth=5, max_features=log2, min_samp

[CV] END max_depth=5, max_features=log2, min_samples_leaf=2, min_samples_split=4, n_estimators=10; total time=   0.0s
[CV] END max_depth=5, max_features=log2, min_samples_leaf=2, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, max_features=log2, min_samples_leaf=2, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, max_features=log2, min_samples_leaf=2, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, max_features=log2, min_samples_leaf=2, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, max_features=log2, min_samples_leaf=2, min_samples_split=4, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, max_features=log2, min_samples_leaf=2, min_samples_split=4, n_estimators=200; total time=   0.4s
[CV] END max_depth=5, max_features=log2, min_samples_leaf=2, min_samples_split=4, n_estimators=200; total time=   0.4s
[CV] END max_depth=5, max_features=log2, min_samp

[CV] END max_depth=5, max_features=log2, min_samples_leaf=3, min_samples_split=4, n_estimators=200; total time=   0.4s
[CV] END max_depth=5, max_features=log2, min_samples_leaf=3, min_samples_split=4, n_estimators=200; total time=   0.4s
[CV] END max_depth=5, max_features=log2, min_samples_leaf=3, min_samples_split=4, n_estimators=250; total time=   0.5s
[CV] END max_depth=5, max_features=log2, min_samples_leaf=3, min_samples_split=4, n_estimators=250; total time=   0.5s
[CV] END max_depth=5, max_features=log2, min_samples_leaf=3, min_samples_split=4, n_estimators=250; total time=   0.6s
[CV] END max_depth=5, max_features=log2, min_samples_leaf=3, min_samples_split=4, n_estimators=250; total time=   0.5s
[CV] END max_depth=5, max_features=log2, min_samples_leaf=3, min_samples_split=4, n_estimators=250; total time=   0.6s


In [62]:
y_preds = grid_cv.predict(X_test)

In [63]:
evaluation_metrics(y_test, y_preds)

R2-Squared is 36.72%
Mean-Absolute-Error is 5907.09
Mean-Squared-Error is 51567145.04


{'R2-Square': 0.37,
 'Mean-Absolute-Error': 5907.09,
 'Mean-Squared-Error': 51567145.04}