#### Hyperparameter optimization methods
##### Grid search
##### Random search
##### Bayesian optimization
##### Gradient-based optimization
##### Evolutionary optimization
##### Population based

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [2]:
data = pd.read_csv("insurance.csv")

In [3]:
print(data.head(5))

   age     sex     bmi  children smoker     region      charges
0   19  female  27.900         0    yes  southwest  16884.92400
1   18    male  33.770         1     no  southeast   1725.55230
2   28    male  33.000         3     no  southeast   4449.46200
3   33    male  22.705         0     no  northwest  21984.47061
4   32    male  28.880         0     no  northwest   3866.85520


In [4]:
data.shape

(1338, 7)

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1338 entries, 0 to 1337
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1338 non-null   int64  
 1   sex       1338 non-null   object 
 2   bmi       1333 non-null   float64
 3   children  1338 non-null   int64  
 4   smoker    1338 non-null   object 
 5   region    1338 non-null   object 
 6   charges   1338 non-null   float64
dtypes: float64(2), int64(2), object(3)
memory usage: 73.3+ KB


In [6]:
count_nan = data.isnull().sum() # the number of missing values for every column
print(count_nan[count_nan > 0])

bmi    5
dtype: int64


In [7]:
print(data[data['bmi'].isna()])

     age     sex  bmi  children smoker     region      charges
14    27    male  NaN         0    yes  southeast  39611.75770
271   50    male  NaN         2    yes  southwest  42856.83800
421   61    male  NaN         0    yes  southeast  46599.10840
545   49    male  NaN         2    yes  northwest  23807.24060
727   29  female  NaN         1    yes  northeast  16657.71745


In [8]:
data['bmi'].fillna(data['bmi'].mean(), inplace = True)
#check how many values are missing (NaN) - after we filled in the NaN
count_nan = data.isnull().sum() # the number of missing values for every column
print(count_nan[count_nan > 0])

Series([], dtype: int64)


In [9]:
data['region'].unique()

array(['southwest', 'southeast', 'northwest', 'northeast'], dtype=object)

In [10]:
data = pd.get_dummies(data, columns=['region'])
data.head(5)

Unnamed: 0,age,sex,bmi,children,smoker,charges,region_northeast,region_northwest,region_southeast,region_southwest
0,19,female,27.9,0,yes,16884.924,0,0,0,1
1,18,male,33.77,1,no,1725.5523,0,0,1,0
2,28,male,33.0,3,no,4449.462,0,0,1,0
3,33,male,22.705,0,no,21984.47061,0,1,0,0
4,32,male,28.88,0,no,3866.8552,0,1,0,0


In [11]:
le = LabelEncoder()
data['sex'] = le.fit_transform(data['sex'])
data['smoker'] = le.fit_transform(data['smoker'])


In [12]:
data.head(5)

Unnamed: 0,age,sex,bmi,children,smoker,charges,region_northeast,region_northwest,region_southeast,region_southwest
0,19,0,27.9,0,1,16884.924,0,0,0,1
1,18,1,33.77,1,0,1725.5523,0,0,1,0
2,28,1,33.0,3,0,4449.462,0,0,1,0
3,33,1,22.705,0,0,21984.47061,0,1,0,0
4,32,1,28.88,0,0,3866.8552,0,1,0,0


In [13]:
data.describe()

Unnamed: 0,age,sex,bmi,children,smoker,charges,region_northeast,region_northwest,region_southeast,region_southwest
count,1338.0,1338.0,1338.0,1338.0,1338.0,1338.0,1338.0,1338.0,1338.0,1338.0
mean,39.207025,0.505232,30.658545,1.094918,0.204783,13270.422265,0.242152,0.2429,0.272048,0.2429
std,14.04996,0.50016,6.081382,1.205493,0.403694,12110.011237,0.428546,0.428995,0.445181,0.428995
min,18.0,0.0,15.96,0.0,0.0,1121.8739,0.0,0.0,0.0,0.0
25%,27.0,0.0,26.315,0.0,0.0,4740.28715,0.0,0.0,0.0,0.0
50%,39.0,1.0,30.4,1.0,0.0,9382.033,0.0,0.0,0.0,0.0
75%,51.0,1.0,34.6,2.0,0.0,16639.912515,0.0,0.0,1.0,0.0
max,64.0,1.0,53.13,5.0,1.0,63770.42801,1.0,1.0,1.0,1.0


In [14]:
 X = data.drop(['charges'], axis=1)
# the dependent variable
y = data[['charges']]

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 0 )

In [16]:
#standard scaler (fit transform on train, transform only on test)
#no of unique features may be different between training and test.
#So we fit transform training set and only transform test.
s_scaler = StandardScaler()
X_train = s_scaler.fit_transform(X_train.astype(np.float))
X_test= s_scaler.transform(X_test.astype(np.float))

#### Linear Regression

In [17]:
regression_model = LinearRegression()
regression_model.fit(X_train, y_train)
y_train_pred = regression_model.predict(X_train)
y_test_pred = regression_model.predict(X_test)

In [18]:
for idx, col_name in enumerate(X.columns):
    print("The coefficient for {} is {}".format(col_name, regression_model.coef_[0][idx]))
intercept = regression_model.intercept_[0]
print("The intercept for our model is {}".format(intercept))

The coefficient for age is 3624.3635619676597
The coefficient for sex is -44.54996175287154
The coefficient for bmi is 1966.9047392665138
The coefficient for children is 661.3560344704907
The coefficient for smoker is 9310.54961688836
The coefficient for region_northeast is 242.57758421728334
The coefficient for region_northwest is -29.49212714621778
The coefficient for region_southeast is -104.19142494776419
The coefficient for region_southwest is -99.14488062641392
The intercept for our model is 13141.350831640624


In [19]:
#The score (R^2) for in-sample and out of sample
print(regression_model.score(X_train, y_train))
print(regression_model.score(X_test, y_test)) #out of sample

0.7283337653139448
0.7855951871694039


#### Polynomial Regression

In [20]:
poly = PolynomialFeatures(degree = 3)
X_poly = poly.fit_transform(X)

In [21]:
X_train,X_test,y_train,y_test = train_test_split(X_poly,y, test_size = 0.33, random_state = 0)

#standard scaler (fit transform on train, transform only on test)
s_scaler = StandardScaler()
X_train = s_scaler.fit_transform(X_train.astype(np.float))
X_test= s_scaler.transform(X_test.astype(np.float))

In [22]:
poly_regressor = LinearRegression()
poly_regressor.fit(X_train, y_train)
y_train_pred = poly_regressor.predict(X_train)


In [23]:
print(poly_regressor.score(X_train, y_train))
print(poly_regressor.score(X_test, y_test))


0.8361438495861537
0.8469519251552219


#### Differences between Flatten() and Ravel()

##### a.ravel():
##### (i) Return only reference/view of original array
##### (ii) If you modify the array you would notice that the value of original array also changes.
##### (iii) Ravel is faster than flatten() as it does not occupy any memory.
##### (iv) Ravel is a library-level function.

##### a.flatten() :
##### (i) Return copy of original array
##### (ii) If you modify any value of this array value of original array is not affected.
##### (iii) Flatten() is comparatively slower than ravel() as it occupies memory.
##### (iv) Flatten is a method of an ndarray object.

#### Support Vector Regression

In [24]:

svr = SVR(kernel='linear', C = 300)

#test train split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 0 )

#standard scaler (fit transform on train, fit only on test)
sc = StandardScaler()
X_train = sc.fit_transform(X_train.astype(np.float))
X_test= sc.transform(X_test.astype(np.float))

#fit model
svr = svr.fit(X_train,y_train.values.ravel())
y_train_pred = svr.predict(X_train)
y_test_pred = svr.predict(X_test)

#print score
print('svr train score %.3f, svr test score: %.3f' % (
svr.score(X_train,y_train),
svr.score(X_test, y_test)))

svr train score 0.598, svr test score: 0.628


#### Decion Tree 

In [25]:

dt = DecisionTreeRegressor(random_state=0)

#test train split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 0 )

#standard scaler (fit transform on train, fit only on test)
sc = StandardScaler()
X_train = sc.fit_transform(X_train.astype(np.float))
X_test= sc.transform(X_test.astype(np.float))


#fit model
dt = dt.fit(X_train,y_train.values.ravel())
y_train_pred = dt.predict(X_train)
y_test_pred = dt.predict(X_test)

#print score
print('dt train score %.3f, dt test score: %.3f' % (
dt.score(X_train,y_train),
dt.score(X_test, y_test)))

dt train score 0.999, dt test score: 0.717


#### Random Forest Regression

In [26]:
forest = RandomForestRegressor(n_estimators = 100,
                              criterion = 'mse',
                              random_state = 1,
                              n_jobs = -1)
#test train split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 0 )

#standard scaler (fit transform on train, fit only on test)
sc = StandardScaler()
X_train = sc.fit_transform(X_train.astype(np.float))
X_test= sc.transform(X_test.astype(np.float))

#fit model
forest.fit(X_train,y_train.values.ravel())
y_train_pred = forest.predict(X_train)
y_test_pred = forest.predict(X_test)

#print score
print('forest train score %.3f, forest test score: %.3f' % (
forest.score(X_train,y_train),
forest.score(X_test, y_test)))

forest train score 0.973, forest test score: 0.859


#### SVR parameter grid

In [27]:
#Function to print best hyperparamaters: 
def print_best_params(gd_model):
    param_dict = gd_model.best_estimator_.get_params()
    print (str(gd_model.estimator))
    model_str = str(gd_model.estimator).split('(')[0]
    print("\n*** {} Best Parameters ***".format(model_str))
    for k in param_dict:
        print("{}: {}".format(k, param_dict[k]))
    print()

#test train split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 0 )

#standard scaler (fit transform on train, fit only on test)
sc = StandardScaler()
X_train = sc.fit_transform(X_train.astype(np.float))
X_test= sc.transform(X_test.astype(np.float))

###Challenge 1: SVR parameter grid###
param_grid_svr = dict(kernel=[ 'linear', 'poly'],
                     degree=[2],
                     C=[600, 700, 800, 900],
                     epsilon=[0.0001, 0.00001, 0.000001])
svr = GridSearchCV(SVR(), param_grid=param_grid_svr, cv=5, verbose=3)


#fit model
svr = svr.fit(X_train,y_train.values.ravel())

#print score
print('\n\nsvr train score %.3f, svr test score: %.3f' % (
svr.score(X_train,y_train),
svr.score(X_test, y_test)))
#print(svr.best_estimator_.get_params())

print_best_params(svr)

Fitting 5 folds for each of 24 candidates, totalling 120 fits
[CV] C=600, degree=2, epsilon=0.0001, kernel=linear ..................
[CV]  C=600, degree=2, epsilon=0.0001, kernel=linear, score=0.671, total=   0.0s
[CV] C=600, degree=2, epsilon=0.0001, kernel=linear ..................
[CV]  C=600, degree=2, epsilon=0.0001, kernel=linear, score=0.663, total=   0.0s
[CV] C=600, degree=2, epsilon=0.0001, kernel=linear ..................
[CV]  C=600, degree=2, epsilon=0.0001, kernel=linear, score=0.571, total=   0.0s
[CV] C=600, degree=2, epsilon=0.0001, kernel=linear ..................
[CV]  C=600, degree=2, epsilon=0.0001, kernel=linear, score=0.636, total=   0.0s
[CV] C=600, degree=2, epsilon=0.0001, kernel=linear ..................
[CV]  C=600, degree=2, epsilon=0.0001, kernel=linear, score=0.555, total=   0.0s
[CV] C=600, degree=2, epsilon=0.0001, kernel=poly ....................
[CV]  C=600, degree=2, epsilon=0.0001, kernel=poly, score=0.485, total=   0.0s
[CV] C=600, degree=2, epsilo

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s


[CV]  C=600, degree=2, epsilon=0.0001, kernel=poly, score=0.336, total=   0.0s
[CV] C=600, degree=2, epsilon=0.0001, kernel=poly ....................
[CV]  C=600, degree=2, epsilon=0.0001, kernel=poly, score=0.278, total=   0.0s
[CV] C=600, degree=2, epsilon=0.0001, kernel=poly ....................
[CV]  C=600, degree=2, epsilon=0.0001, kernel=poly, score=0.372, total=   0.0s
[CV] C=600, degree=2, epsilon=0.0001, kernel=poly ....................
[CV]  C=600, degree=2, epsilon=0.0001, kernel=poly, score=0.222, total=   0.0s
[CV] C=600, degree=2, epsilon=1e-05, kernel=linear ...................
[CV]  C=600, degree=2, epsilon=1e-05, kernel=linear, score=0.671, total=   0.0s
[CV] C=600, degree=2, epsilon=1e-05, kernel=linear ...................
[CV]  C=600, degree=2, epsilon=1e-05, kernel=linear, score=0.663, total=   0.0s
[CV] C=600, degree=2, epsilon=1e-05, kernel=linear ...................
[CV]  C=600, degree=2, epsilon=1e-05, kernel=linear, score=0.571, total=   0.0s
[CV] C=600, degree

[CV]  C=800, degree=2, epsilon=0.0001, kernel=linear, score=0.621, total=   0.0s
[CV] C=800, degree=2, epsilon=0.0001, kernel=linear ..................
[CV]  C=800, degree=2, epsilon=0.0001, kernel=linear, score=0.572, total=   0.0s
[CV] C=800, degree=2, epsilon=0.0001, kernel=linear ..................
[CV]  C=800, degree=2, epsilon=0.0001, kernel=linear, score=0.588, total=   0.0s
[CV] C=800, degree=2, epsilon=0.0001, kernel=linear ..................
[CV]  C=800, degree=2, epsilon=0.0001, kernel=linear, score=0.555, total=   0.0s
[CV] C=800, degree=2, epsilon=0.0001, kernel=poly ....................
[CV]  C=800, degree=2, epsilon=0.0001, kernel=poly, score=0.569, total=   0.0s
[CV] C=800, degree=2, epsilon=0.0001, kernel=poly ....................
[CV]  C=800, degree=2, epsilon=0.0001, kernel=poly, score=0.378, total=   0.0s
[CV] C=800, degree=2, epsilon=0.0001, kernel=poly ....................
[CV]  C=800, degree=2, epsilon=0.0001, kernel=poly, score=0.345, total=   0.0s
[CV] C=800, d

[CV]  C=900, degree=2, epsilon=1e-06, kernel=poly, score=0.295, total=   0.0s


svr train score 0.683, svr test score: 0.734
SVR()

*** SVR Best Parameters ***
C: 700
cache_size: 200
coef0: 0.0
degree: 2
epsilon: 0.0001
gamma: scale
kernel: linear
max_iter: -1
shrinking: True
tol: 0.001
verbose: False



[Parallel(n_jobs=1)]: Done 120 out of 120 | elapsed:    3.5s finished


#### Decision Tree parameter grid

In [28]:
param_grid_dt = dict(criterion = ['mse'], min_samples_leaf=[8,9,10,11,12,13], 
                  max_depth = [3,4,5,6,7,8],
                  min_impurity_decrease = [0, 0.01, 0.02, 1, 2],
                random_state =[0])


In [29]:
dt = GridSearchCV(DecisionTreeRegressor(), param_grid=param_grid_dt,
                  cv=5, verbose=3)

In [30]:
#test train split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 0 )

#standard scaler (fit transform on train, fit only on test)
sc = StandardScaler()
X_train = sc.fit_transform(X_train.astype(np.float))
X_test= sc.transform(X_test.astype(np.float))

In [31]:
dt = dt.fit(X_train,y_train.values.ravel())

Fitting 5 folds for each of 180 candidates, totalling 900 fits
[CV] criterion=mse, max_depth=3, min_impurity_decrease=0, min_samples_leaf=8, random_state=0 
[CV]  criterion=mse, max_depth=3, min_impurity_decrease=0, min_samples_leaf=8, random_state=0, score=0.819, total=   0.0s
[CV] criterion=mse, max_depth=3, min_impurity_decrease=0, min_samples_leaf=8, random_state=0 
[CV]  criterion=mse, max_depth=3, min_impurity_decrease=0, min_samples_leaf=8, random_state=0, score=0.806, total=   0.0s
[CV] criterion=mse, max_depth=3, min_impurity_decrease=0, min_samples_leaf=8, random_state=0 
[CV]  criterion=mse, max_depth=3, min_impurity_decrease=0, min_samples_leaf=8, random_state=0, score=0.843, total=   0.0s
[CV] criterion=mse, max_depth=3, min_impurity_decrease=0, min_samples_leaf=8, random_state=0 
[CV]  criterion=mse, max_depth=3, min_impurity_decrease=0, min_samples_leaf=8, random_state=0, score=0.804, total=   0.0s
[CV] criterion=mse, max_depth=3, min_impurity_decrease=0, min_samples_lea

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


[CV]  criterion=mse, max_depth=3, min_impurity_decrease=0.01, min_samples_leaf=9, random_state=0, score=0.804, total=   0.0s
[CV] criterion=mse, max_depth=3, min_impurity_decrease=0.01, min_samples_leaf=9, random_state=0 
[CV]  criterion=mse, max_depth=3, min_impurity_decrease=0.01, min_samples_leaf=9, random_state=0, score=0.787, total=   0.0s
[CV] criterion=mse, max_depth=3, min_impurity_decrease=0.01, min_samples_leaf=10, random_state=0 
[CV]  criterion=mse, max_depth=3, min_impurity_decrease=0.01, min_samples_leaf=10, random_state=0, score=0.819, total=   0.0s
[CV] criterion=mse, max_depth=3, min_impurity_decrease=0.01, min_samples_leaf=10, random_state=0 
[CV]  criterion=mse, max_depth=3, min_impurity_decrease=0.01, min_samples_leaf=10, random_state=0, score=0.806, total=   0.0s
[CV] criterion=mse, max_depth=3, min_impurity_decrease=0.01, min_samples_leaf=10, random_state=0 
[CV]  criterion=mse, max_depth=3, min_impurity_decrease=0.01, min_samples_leaf=10, random_state=0, score=0.

[CV]  criterion=mse, max_depth=3, min_impurity_decrease=0.02, min_samples_leaf=12, random_state=0, score=0.806, total=   0.0s
[CV] criterion=mse, max_depth=3, min_impurity_decrease=0.02, min_samples_leaf=12, random_state=0 
[CV]  criterion=mse, max_depth=3, min_impurity_decrease=0.02, min_samples_leaf=12, random_state=0, score=0.843, total=   0.0s
[CV] criterion=mse, max_depth=3, min_impurity_decrease=0.02, min_samples_leaf=12, random_state=0 
[CV]  criterion=mse, max_depth=3, min_impurity_decrease=0.02, min_samples_leaf=12, random_state=0, score=0.804, total=   0.0s
[CV] criterion=mse, max_depth=3, min_impurity_decrease=0.02, min_samples_leaf=12, random_state=0 
[CV]  criterion=mse, max_depth=3, min_impurity_decrease=0.02, min_samples_leaf=12, random_state=0, score=0.787, total=   0.0s
[CV] criterion=mse, max_depth=3, min_impurity_decrease=0.02, min_samples_leaf=13, random_state=0 
[CV]  criterion=mse, max_depth=3, min_impurity_decrease=0.02, min_samples_leaf=13, random_state=0, score

[CV]  criterion=mse, max_depth=3, min_impurity_decrease=2, min_samples_leaf=8, random_state=0, score=0.843, total=   0.0s
[CV] criterion=mse, max_depth=3, min_impurity_decrease=2, min_samples_leaf=8, random_state=0 
[CV]  criterion=mse, max_depth=3, min_impurity_decrease=2, min_samples_leaf=8, random_state=0, score=0.804, total=   0.0s
[CV] criterion=mse, max_depth=3, min_impurity_decrease=2, min_samples_leaf=8, random_state=0 
[CV]  criterion=mse, max_depth=3, min_impurity_decrease=2, min_samples_leaf=8, random_state=0, score=0.787, total=   0.0s
[CV] criterion=mse, max_depth=3, min_impurity_decrease=2, min_samples_leaf=9, random_state=0 
[CV]  criterion=mse, max_depth=3, min_impurity_decrease=2, min_samples_leaf=9, random_state=0, score=0.819, total=   0.0s
[CV] criterion=mse, max_depth=3, min_impurity_decrease=2, min_samples_leaf=9, random_state=0 
[CV]  criterion=mse, max_depth=3, min_impurity_decrease=2, min_samples_leaf=9, random_state=0, score=0.806, total=   0.0s
[CV] criterion

[CV]  criterion=mse, max_depth=4, min_impurity_decrease=0, min_samples_leaf=11, random_state=0, score=0.818, total=   0.0s
[CV] criterion=mse, max_depth=4, min_impurity_decrease=0, min_samples_leaf=11, random_state=0 
[CV]  criterion=mse, max_depth=4, min_impurity_decrease=0, min_samples_leaf=11, random_state=0, score=0.820, total=   0.0s
[CV] criterion=mse, max_depth=4, min_impurity_decrease=0, min_samples_leaf=11, random_state=0 
[CV]  criterion=mse, max_depth=4, min_impurity_decrease=0, min_samples_leaf=11, random_state=0, score=0.845, total=   0.0s
[CV] criterion=mse, max_depth=4, min_impurity_decrease=0, min_samples_leaf=11, random_state=0 
[CV]  criterion=mse, max_depth=4, min_impurity_decrease=0, min_samples_leaf=11, random_state=0, score=0.815, total=   0.0s
[CV] criterion=mse, max_depth=4, min_impurity_decrease=0, min_samples_leaf=11, random_state=0 
[CV]  criterion=mse, max_depth=4, min_impurity_decrease=0, min_samples_leaf=11, random_state=0, score=0.796, total=   0.0s
[CV] 

[CV]  criterion=mse, max_depth=4, min_impurity_decrease=0.01, min_samples_leaf=12, random_state=0, score=0.796, total=   0.0s
[CV] criterion=mse, max_depth=4, min_impurity_decrease=0.01, min_samples_leaf=13, random_state=0 
[CV]  criterion=mse, max_depth=4, min_impurity_decrease=0.01, min_samples_leaf=13, random_state=0, score=0.819, total=   0.0s
[CV] criterion=mse, max_depth=4, min_impurity_decrease=0.01, min_samples_leaf=13, random_state=0 
[CV]  criterion=mse, max_depth=4, min_impurity_decrease=0.01, min_samples_leaf=13, random_state=0, score=0.820, total=   0.0s
[CV] criterion=mse, max_depth=4, min_impurity_decrease=0.01, min_samples_leaf=13, random_state=0 
[CV]  criterion=mse, max_depth=4, min_impurity_decrease=0.01, min_samples_leaf=13, random_state=0, score=0.841, total=   0.0s
[CV] criterion=mse, max_depth=4, min_impurity_decrease=0.01, min_samples_leaf=13, random_state=0 
[CV]  criterion=mse, max_depth=4, min_impurity_decrease=0.01, min_samples_leaf=13, random_state=0, score

[CV]  criterion=mse, max_depth=4, min_impurity_decrease=1, min_samples_leaf=9, random_state=0, score=0.817, total=   0.0s
[CV] criterion=mse, max_depth=4, min_impurity_decrease=1, min_samples_leaf=9, random_state=0 
[CV]  criterion=mse, max_depth=4, min_impurity_decrease=1, min_samples_leaf=9, random_state=0, score=0.819, total=   0.0s
[CV] criterion=mse, max_depth=4, min_impurity_decrease=1, min_samples_leaf=9, random_state=0 
[CV]  criterion=mse, max_depth=4, min_impurity_decrease=1, min_samples_leaf=9, random_state=0, score=0.846, total=   0.0s
[CV] criterion=mse, max_depth=4, min_impurity_decrease=1, min_samples_leaf=9, random_state=0 
[CV]  criterion=mse, max_depth=4, min_impurity_decrease=1, min_samples_leaf=9, random_state=0, score=0.812, total=   0.0s
[CV] criterion=mse, max_depth=4, min_impurity_decrease=1, min_samples_leaf=9, random_state=0 
[CV]  criterion=mse, max_depth=4, min_impurity_decrease=1, min_samples_leaf=9, random_state=0, score=0.796, total=   0.0s
[CV] criterion

[CV]  criterion=mse, max_depth=4, min_impurity_decrease=2, min_samples_leaf=11, random_state=0, score=0.818, total=   0.0s
[CV] criterion=mse, max_depth=4, min_impurity_decrease=2, min_samples_leaf=11, random_state=0 
[CV]  criterion=mse, max_depth=4, min_impurity_decrease=2, min_samples_leaf=11, random_state=0, score=0.820, total=   0.0s
[CV] criterion=mse, max_depth=4, min_impurity_decrease=2, min_samples_leaf=11, random_state=0 
[CV]  criterion=mse, max_depth=4, min_impurity_decrease=2, min_samples_leaf=11, random_state=0, score=0.845, total=   0.0s
[CV] criterion=mse, max_depth=4, min_impurity_decrease=2, min_samples_leaf=11, random_state=0 
[CV]  criterion=mse, max_depth=4, min_impurity_decrease=2, min_samples_leaf=11, random_state=0, score=0.815, total=   0.0s
[CV] criterion=mse, max_depth=4, min_impurity_decrease=2, min_samples_leaf=11, random_state=0 
[CV]  criterion=mse, max_depth=4, min_impurity_decrease=2, min_samples_leaf=11, random_state=0, score=0.796, total=   0.0s
[CV] 

[CV]  criterion=mse, max_depth=5, min_impurity_decrease=0, min_samples_leaf=13, random_state=0, score=0.827, total=   0.0s
[CV] criterion=mse, max_depth=5, min_impurity_decrease=0, min_samples_leaf=13, random_state=0 
[CV]  criterion=mse, max_depth=5, min_impurity_decrease=0, min_samples_leaf=13, random_state=0, score=0.820, total=   0.0s
[CV] criterion=mse, max_depth=5, min_impurity_decrease=0, min_samples_leaf=13, random_state=0 
[CV]  criterion=mse, max_depth=5, min_impurity_decrease=0, min_samples_leaf=13, random_state=0, score=0.848, total=   0.0s
[CV] criterion=mse, max_depth=5, min_impurity_decrease=0, min_samples_leaf=13, random_state=0 
[CV]  criterion=mse, max_depth=5, min_impurity_decrease=0, min_samples_leaf=13, random_state=0, score=0.814, total=   0.0s
[CV] criterion=mse, max_depth=5, min_impurity_decrease=0, min_samples_leaf=13, random_state=0 
[CV]  criterion=mse, max_depth=5, min_impurity_decrease=0, min_samples_leaf=13, random_state=0, score=0.787, total=   0.0s
[CV] 

[CV]  criterion=mse, max_depth=5, min_impurity_decrease=0.02, min_samples_leaf=8, random_state=0, score=0.821, total=   0.0s
[CV] criterion=mse, max_depth=5, min_impurity_decrease=0.02, min_samples_leaf=8, random_state=0 
[CV]  criterion=mse, max_depth=5, min_impurity_decrease=0.02, min_samples_leaf=8, random_state=0, score=0.788, total=   0.0s
[CV] criterion=mse, max_depth=5, min_impurity_decrease=0.02, min_samples_leaf=9, random_state=0 
[CV]  criterion=mse, max_depth=5, min_impurity_decrease=0.02, min_samples_leaf=9, random_state=0, score=0.827, total=   0.0s
[CV] criterion=mse, max_depth=5, min_impurity_decrease=0.02, min_samples_leaf=9, random_state=0 
[CV]  criterion=mse, max_depth=5, min_impurity_decrease=0.02, min_samples_leaf=9, random_state=0, score=0.807, total=   0.0s
[CV] criterion=mse, max_depth=5, min_impurity_decrease=0.02, min_samples_leaf=9, random_state=0 
[CV]  criterion=mse, max_depth=5, min_impurity_decrease=0.02, min_samples_leaf=9, random_state=0, score=0.853, t

[CV]  criterion=mse, max_depth=5, min_impurity_decrease=1, min_samples_leaf=10, random_state=0, score=0.852, total=   0.0s
[CV] criterion=mse, max_depth=5, min_impurity_decrease=1, min_samples_leaf=10, random_state=0 
[CV]  criterion=mse, max_depth=5, min_impurity_decrease=1, min_samples_leaf=10, random_state=0, score=0.816, total=   0.0s
[CV] criterion=mse, max_depth=5, min_impurity_decrease=1, min_samples_leaf=10, random_state=0 
[CV]  criterion=mse, max_depth=5, min_impurity_decrease=1, min_samples_leaf=10, random_state=0, score=0.787, total=   0.0s
[CV] criterion=mse, max_depth=5, min_impurity_decrease=1, min_samples_leaf=11, random_state=0 
[CV]  criterion=mse, max_depth=5, min_impurity_decrease=1, min_samples_leaf=11, random_state=0, score=0.826, total=   0.0s
[CV] criterion=mse, max_depth=5, min_impurity_decrease=1, min_samples_leaf=11, random_state=0 
[CV]  criterion=mse, max_depth=5, min_impurity_decrease=1, min_samples_leaf=11, random_state=0, score=0.819, total=   0.0s
[CV] 

[CV]  criterion=mse, max_depth=5, min_impurity_decrease=2, min_samples_leaf=12, random_state=0, score=0.826, total=   0.0s
[CV] criterion=mse, max_depth=5, min_impurity_decrease=2, min_samples_leaf=12, random_state=0 
[CV]  criterion=mse, max_depth=5, min_impurity_decrease=2, min_samples_leaf=12, random_state=0, score=0.820, total=   0.0s
[CV] criterion=mse, max_depth=5, min_impurity_decrease=2, min_samples_leaf=12, random_state=0 
[CV]  criterion=mse, max_depth=5, min_impurity_decrease=2, min_samples_leaf=12, random_state=0, score=0.851, total=   0.0s
[CV] criterion=mse, max_depth=5, min_impurity_decrease=2, min_samples_leaf=12, random_state=0 
[CV]  criterion=mse, max_depth=5, min_impurity_decrease=2, min_samples_leaf=12, random_state=0, score=0.816, total=   0.0s
[CV] criterion=mse, max_depth=5, min_impurity_decrease=2, min_samples_leaf=12, random_state=0 
[CV]  criterion=mse, max_depth=5, min_impurity_decrease=2, min_samples_leaf=12, random_state=0, score=0.787, total=   0.0s
[CV] 

[CV]  criterion=mse, max_depth=6, min_impurity_decrease=0, min_samples_leaf=13, random_state=0, score=0.788, total=   0.0s
[CV] criterion=mse, max_depth=6, min_impurity_decrease=0.01, min_samples_leaf=8, random_state=0 
[CV]  criterion=mse, max_depth=6, min_impurity_decrease=0.01, min_samples_leaf=8, random_state=0, score=0.812, total=   0.0s
[CV] criterion=mse, max_depth=6, min_impurity_decrease=0.01, min_samples_leaf=8, random_state=0 
[CV]  criterion=mse, max_depth=6, min_impurity_decrease=0.01, min_samples_leaf=8, random_state=0, score=0.791, total=   0.0s
[CV] criterion=mse, max_depth=6, min_impurity_decrease=0.01, min_samples_leaf=8, random_state=0 
[CV]  criterion=mse, max_depth=6, min_impurity_decrease=0.01, min_samples_leaf=8, random_state=0, score=0.851, total=   0.0s
[CV] criterion=mse, max_depth=6, min_impurity_decrease=0.01, min_samples_leaf=8, random_state=0 
[CV]  criterion=mse, max_depth=6, min_impurity_decrease=0.01, min_samples_leaf=8, random_state=0, score=0.812, tot

[CV]  criterion=mse, max_depth=6, min_impurity_decrease=0.02, min_samples_leaf=9, random_state=0, score=0.795, total=   0.0s
[CV] criterion=mse, max_depth=6, min_impurity_decrease=0.02, min_samples_leaf=9, random_state=0 
[CV]  criterion=mse, max_depth=6, min_impurity_decrease=0.02, min_samples_leaf=9, random_state=0, score=0.854, total=   0.0s
[CV] criterion=mse, max_depth=6, min_impurity_decrease=0.02, min_samples_leaf=9, random_state=0 
[CV]  criterion=mse, max_depth=6, min_impurity_decrease=0.02, min_samples_leaf=9, random_state=0, score=0.814, total=   0.0s
[CV] criterion=mse, max_depth=6, min_impurity_decrease=0.02, min_samples_leaf=9, random_state=0 
[CV]  criterion=mse, max_depth=6, min_impurity_decrease=0.02, min_samples_leaf=9, random_state=0, score=0.788, total=   0.0s
[CV] criterion=mse, max_depth=6, min_impurity_decrease=0.02, min_samples_leaf=10, random_state=0 
[CV]  criterion=mse, max_depth=6, min_impurity_decrease=0.02, min_samples_leaf=10, random_state=0, score=0.818,

[CV]  criterion=mse, max_depth=6, min_impurity_decrease=1, min_samples_leaf=10, random_state=0, score=0.788, total=   0.0s
[CV] criterion=mse, max_depth=6, min_impurity_decrease=1, min_samples_leaf=11, random_state=0 
[CV]  criterion=mse, max_depth=6, min_impurity_decrease=1, min_samples_leaf=11, random_state=0, score=0.820, total=   0.0s
[CV] criterion=mse, max_depth=6, min_impurity_decrease=1, min_samples_leaf=11, random_state=0 
[CV]  criterion=mse, max_depth=6, min_impurity_decrease=1, min_samples_leaf=11, random_state=0, score=0.809, total=   0.0s
[CV] criterion=mse, max_depth=6, min_impurity_decrease=1, min_samples_leaf=11, random_state=0 
[CV]  criterion=mse, max_depth=6, min_impurity_decrease=1, min_samples_leaf=11, random_state=0, score=0.849, total=   0.0s
[CV] criterion=mse, max_depth=6, min_impurity_decrease=1, min_samples_leaf=11, random_state=0 
[CV]  criterion=mse, max_depth=6, min_impurity_decrease=1, min_samples_leaf=11, random_state=0, score=0.807, total=   0.0s
[CV] 

[CV]  criterion=mse, max_depth=7, min_impurity_decrease=0, min_samples_leaf=13, random_state=0, score=0.782, total=   0.0s
[CV] criterion=mse, max_depth=7, min_impurity_decrease=0.01, min_samples_leaf=8, random_state=0 
[CV]  criterion=mse, max_depth=7, min_impurity_decrease=0.01, min_samples_leaf=8, random_state=0, score=0.799, total=   0.0s
[CV] criterion=mse, max_depth=7, min_impurity_decrease=0.01, min_samples_leaf=8, random_state=0 
[CV]  criterion=mse, max_depth=7, min_impurity_decrease=0.01, min_samples_leaf=8, random_state=0, score=0.777, total=   0.0s
[CV] criterion=mse, max_depth=7, min_impurity_decrease=0.01, min_samples_leaf=8, random_state=0 
[CV]  criterion=mse, max_depth=7, min_impurity_decrease=0.01, min_samples_leaf=8, random_state=0, score=0.830, total=   0.0s
[CV] criterion=mse, max_depth=7, min_impurity_decrease=0.01, min_samples_leaf=8, random_state=0 
[CV]  criterion=mse, max_depth=7, min_impurity_decrease=0.01, min_samples_leaf=8, random_state=0, score=0.802, tot

[CV]  criterion=mse, max_depth=7, min_impurity_decrease=1, min_samples_leaf=10, random_state=0, score=0.793, total=   0.0s
[CV] criterion=mse, max_depth=7, min_impurity_decrease=1, min_samples_leaf=10, random_state=0 
[CV]  criterion=mse, max_depth=7, min_impurity_decrease=1, min_samples_leaf=10, random_state=0, score=0.833, total=   0.0s
[CV] criterion=mse, max_depth=7, min_impurity_decrease=1, min_samples_leaf=10, random_state=0 
[CV]  criterion=mse, max_depth=7, min_impurity_decrease=1, min_samples_leaf=10, random_state=0, score=0.803, total=   0.0s
[CV] criterion=mse, max_depth=7, min_impurity_decrease=1, min_samples_leaf=10, random_state=0 
[CV]  criterion=mse, max_depth=7, min_impurity_decrease=1, min_samples_leaf=10, random_state=0, score=0.782, total=   0.0s
[CV] criterion=mse, max_depth=7, min_impurity_decrease=1, min_samples_leaf=11, random_state=0 
[CV]  criterion=mse, max_depth=7, min_impurity_decrease=1, min_samples_leaf=11, random_state=0, score=0.811, total=   0.0s
[CV] 

[CV]  criterion=mse, max_depth=8, min_impurity_decrease=0, min_samples_leaf=13, random_state=0, score=0.819, total=   0.0s
[CV] criterion=mse, max_depth=8, min_impurity_decrease=0, min_samples_leaf=13, random_state=0 
[CV]  criterion=mse, max_depth=8, min_impurity_decrease=0, min_samples_leaf=13, random_state=0, score=0.808, total=   0.0s
[CV] criterion=mse, max_depth=8, min_impurity_decrease=0, min_samples_leaf=13, random_state=0 
[CV]  criterion=mse, max_depth=8, min_impurity_decrease=0, min_samples_leaf=13, random_state=0, score=0.843, total=   0.0s
[CV] criterion=mse, max_depth=8, min_impurity_decrease=0, min_samples_leaf=13, random_state=0 
[CV]  criterion=mse, max_depth=8, min_impurity_decrease=0, min_samples_leaf=13, random_state=0, score=0.798, total=   0.0s
[CV] criterion=mse, max_depth=8, min_impurity_decrease=0, min_samples_leaf=13, random_state=0 
[CV]  criterion=mse, max_depth=8, min_impurity_decrease=0, min_samples_leaf=13, random_state=0, score=0.780, total=   0.0s
[CV] 

[CV]  criterion=mse, max_depth=8, min_impurity_decrease=1, min_samples_leaf=9, random_state=0, score=0.802, total=   0.0s
[CV] criterion=mse, max_depth=8, min_impurity_decrease=1, min_samples_leaf=9, random_state=0 
[CV]  criterion=mse, max_depth=8, min_impurity_decrease=1, min_samples_leaf=9, random_state=0, score=0.781, total=   0.0s
[CV] criterion=mse, max_depth=8, min_impurity_decrease=1, min_samples_leaf=10, random_state=0 
[CV]  criterion=mse, max_depth=8, min_impurity_decrease=1, min_samples_leaf=10, random_state=0, score=0.809, total=   0.0s
[CV] criterion=mse, max_depth=8, min_impurity_decrease=1, min_samples_leaf=10, random_state=0 
[CV]  criterion=mse, max_depth=8, min_impurity_decrease=1, min_samples_leaf=10, random_state=0, score=0.789, total=   0.0s
[CV] criterion=mse, max_depth=8, min_impurity_decrease=1, min_samples_leaf=10, random_state=0 
[CV]  criterion=mse, max_depth=8, min_impurity_decrease=1, min_samples_leaf=10, random_state=0, score=0.833, total=   0.0s
[CV] cri

[Parallel(n_jobs=1)]: Done 900 out of 900 | elapsed:    2.4s finished


In [32]:
#print score

print('\n\ndt train score %.3f, dr test score: %.3f' % (
dt.score(X_train,y_train),
dt.score(X_test, y_test)))
print()
print(dt.best_estimator_.get_params())
print()
print_best_params(dt)



dt train score 0.856, dr test score: 0.880

{'ccp_alpha': 0.0, 'criterion': 'mse', 'max_depth': 5, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0, 'min_impurity_split': None, 'min_samples_leaf': 12, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'presort': 'deprecated', 'random_state': 0, 'splitter': 'best'}

DecisionTreeRegressor()

*** DecisionTreeRegressor Best Parameters ***
ccp_alpha: 0.0
criterion: mse
max_depth: 5
max_features: None
max_leaf_nodes: None
min_impurity_decrease: 0
min_impurity_split: None
min_samples_leaf: 12
min_samples_split: 2
min_weight_fraction_leaf: 0.0
presort: deprecated
random_state: 0
splitter: best



#### Random Forest parameter grid

In [33]:
param_grid_rf = dict(n_estimators=[90,100],
                     max_depth=[3,4],
                     min_samples_split= [2,3,4],
                     min_samples_leaf= [3,5,7],
                     bootstrap=[True, False],
                     oob_score=[False])

In [34]:
rf = GridSearchCV(RandomForestRegressor(), param_grid=param_grid_rf,
                  cv=5, verbose=3)

In [35]:
#test train split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 0 )

#standard scaler (fit transform on train, fit only on test)
sc = StandardScaler()
X_train = sc.fit_transform(X_train.astype(np.float))
X_test= sc.transform(X_test.astype(np.float))

In [36]:
rf = rf.fit(X_train,y_train.values.ravel())

Fitting 5 folds for each of 72 candidates, totalling 360 fits
[CV] bootstrap=True, max_depth=3, min_samples_leaf=3, min_samples_split=2, n_estimators=90, oob_score=False 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  bootstrap=True, max_depth=3, min_samples_leaf=3, min_samples_split=2, n_estimators=90, oob_score=False, score=0.836, total=   0.1s
[CV] bootstrap=True, max_depth=3, min_samples_leaf=3, min_samples_split=2, n_estimators=90, oob_score=False 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s


[CV]  bootstrap=True, max_depth=3, min_samples_leaf=3, min_samples_split=2, n_estimators=90, oob_score=False, score=0.810, total=   0.1s
[CV] bootstrap=True, max_depth=3, min_samples_leaf=3, min_samples_split=2, n_estimators=90, oob_score=False 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.3s remaining:    0.0s


[CV]  bootstrap=True, max_depth=3, min_samples_leaf=3, min_samples_split=2, n_estimators=90, oob_score=False, score=0.857, total=   0.1s
[CV] bootstrap=True, max_depth=3, min_samples_leaf=3, min_samples_split=2, n_estimators=90, oob_score=False 
[CV]  bootstrap=True, max_depth=3, min_samples_leaf=3, min_samples_split=2, n_estimators=90, oob_score=False, score=0.827, total=   0.1s
[CV] bootstrap=True, max_depth=3, min_samples_leaf=3, min_samples_split=2, n_estimators=90, oob_score=False 
[CV]  bootstrap=True, max_depth=3, min_samples_leaf=3, min_samples_split=2, n_estimators=90, oob_score=False, score=0.794, total=   0.1s
[CV] bootstrap=True, max_depth=3, min_samples_leaf=3, min_samples_split=2, n_estimators=100, oob_score=False 
[CV]  bootstrap=True, max_depth=3, min_samples_leaf=3, min_samples_split=2, n_estimators=100, oob_score=False, score=0.836, total=   0.1s
[CV] bootstrap=True, max_depth=3, min_samples_leaf=3, min_samples_split=2, n_estimators=100, oob_score=False 
[CV]  bootstr

[CV]  bootstrap=True, max_depth=3, min_samples_leaf=5, min_samples_split=2, n_estimators=100, oob_score=False, score=0.812, total=   0.2s
[CV] bootstrap=True, max_depth=3, min_samples_leaf=5, min_samples_split=2, n_estimators=100, oob_score=False 
[CV]  bootstrap=True, max_depth=3, min_samples_leaf=5, min_samples_split=2, n_estimators=100, oob_score=False, score=0.856, total=   0.2s
[CV] bootstrap=True, max_depth=3, min_samples_leaf=5, min_samples_split=2, n_estimators=100, oob_score=False 
[CV]  bootstrap=True, max_depth=3, min_samples_leaf=5, min_samples_split=2, n_estimators=100, oob_score=False, score=0.827, total=   0.2s
[CV] bootstrap=True, max_depth=3, min_samples_leaf=5, min_samples_split=2, n_estimators=100, oob_score=False 
[CV]  bootstrap=True, max_depth=3, min_samples_leaf=5, min_samples_split=2, n_estimators=100, oob_score=False, score=0.795, total=   0.2s
[CV] bootstrap=True, max_depth=3, min_samples_leaf=5, min_samples_split=3, n_estimators=90, oob_score=False 
[CV]  boo

[CV]  bootstrap=True, max_depth=3, min_samples_leaf=7, min_samples_split=3, n_estimators=90, oob_score=False, score=0.838, total=   0.1s
[CV] bootstrap=True, max_depth=3, min_samples_leaf=7, min_samples_split=3, n_estimators=90, oob_score=False 
[CV]  bootstrap=True, max_depth=3, min_samples_leaf=7, min_samples_split=3, n_estimators=90, oob_score=False, score=0.809, total=   0.1s
[CV] bootstrap=True, max_depth=3, min_samples_leaf=7, min_samples_split=3, n_estimators=90, oob_score=False 
[CV]  bootstrap=True, max_depth=3, min_samples_leaf=7, min_samples_split=3, n_estimators=90, oob_score=False, score=0.856, total=   0.1s
[CV] bootstrap=True, max_depth=3, min_samples_leaf=7, min_samples_split=3, n_estimators=90, oob_score=False 
[CV]  bootstrap=True, max_depth=3, min_samples_leaf=7, min_samples_split=3, n_estimators=90, oob_score=False, score=0.827, total=   0.2s
[CV] bootstrap=True, max_depth=3, min_samples_leaf=7, min_samples_split=3, n_estimators=90, oob_score=False 
[CV]  bootstrap=

[CV]  bootstrap=True, max_depth=4, min_samples_leaf=3, min_samples_split=3, n_estimators=100, oob_score=False, score=0.837, total=   0.2s
[CV] bootstrap=True, max_depth=4, min_samples_leaf=3, min_samples_split=3, n_estimators=100, oob_score=False 
[CV]  bootstrap=True, max_depth=4, min_samples_leaf=3, min_samples_split=3, n_estimators=100, oob_score=False, score=0.819, total=   0.2s
[CV] bootstrap=True, max_depth=4, min_samples_leaf=3, min_samples_split=3, n_estimators=100, oob_score=False 
[CV]  bootstrap=True, max_depth=4, min_samples_leaf=3, min_samples_split=3, n_estimators=100, oob_score=False, score=0.854, total=   0.2s
[CV] bootstrap=True, max_depth=4, min_samples_leaf=3, min_samples_split=3, n_estimators=100, oob_score=False 
[CV]  bootstrap=True, max_depth=4, min_samples_leaf=3, min_samples_split=3, n_estimators=100, oob_score=False, score=0.834, total=   0.2s
[CV] bootstrap=True, max_depth=4, min_samples_leaf=3, min_samples_split=3, n_estimators=100, oob_score=False 
[CV]  bo

[CV]  bootstrap=True, max_depth=4, min_samples_leaf=5, min_samples_split=3, n_estimators=100, oob_score=False, score=0.799, total=   0.2s
[CV] bootstrap=True, max_depth=4, min_samples_leaf=5, min_samples_split=4, n_estimators=90, oob_score=False 
[CV]  bootstrap=True, max_depth=4, min_samples_leaf=5, min_samples_split=4, n_estimators=90, oob_score=False, score=0.840, total=   0.1s
[CV] bootstrap=True, max_depth=4, min_samples_leaf=5, min_samples_split=4, n_estimators=90, oob_score=False 
[CV]  bootstrap=True, max_depth=4, min_samples_leaf=5, min_samples_split=4, n_estimators=90, oob_score=False, score=0.821, total=   0.1s
[CV] bootstrap=True, max_depth=4, min_samples_leaf=5, min_samples_split=4, n_estimators=90, oob_score=False 
[CV]  bootstrap=True, max_depth=4, min_samples_leaf=5, min_samples_split=4, n_estimators=90, oob_score=False, score=0.855, total=   0.1s
[CV] bootstrap=True, max_depth=4, min_samples_leaf=5, min_samples_split=4, n_estimators=90, oob_score=False 
[CV]  bootstrap

[CV]  bootstrap=True, max_depth=4, min_samples_leaf=7, min_samples_split=4, n_estimators=90, oob_score=False, score=0.837, total=   0.2s
[CV] bootstrap=True, max_depth=4, min_samples_leaf=7, min_samples_split=4, n_estimators=90, oob_score=False 
[CV]  bootstrap=True, max_depth=4, min_samples_leaf=7, min_samples_split=4, n_estimators=90, oob_score=False, score=0.799, total=   0.2s
[CV] bootstrap=True, max_depth=4, min_samples_leaf=7, min_samples_split=4, n_estimators=100, oob_score=False 
[CV]  bootstrap=True, max_depth=4, min_samples_leaf=7, min_samples_split=4, n_estimators=100, oob_score=False, score=0.838, total=   0.2s
[CV] bootstrap=True, max_depth=4, min_samples_leaf=7, min_samples_split=4, n_estimators=100, oob_score=False 
[CV]  bootstrap=True, max_depth=4, min_samples_leaf=7, min_samples_split=4, n_estimators=100, oob_score=False, score=0.818, total=   0.2s
[CV] bootstrap=True, max_depth=4, min_samples_leaf=7, min_samples_split=4, n_estimators=100, oob_score=False 
[CV]  boots

[CV]  bootstrap=False, max_depth=3, min_samples_leaf=3, min_samples_split=4, n_estimators=100, oob_score=False, score=0.843, total=   0.1s
[CV] bootstrap=False, max_depth=3, min_samples_leaf=3, min_samples_split=4, n_estimators=100, oob_score=False 
[CV]  bootstrap=False, max_depth=3, min_samples_leaf=3, min_samples_split=4, n_estimators=100, oob_score=False, score=0.804, total=   0.1s
[CV] bootstrap=False, max_depth=3, min_samples_leaf=3, min_samples_split=4, n_estimators=100, oob_score=False 
[CV]  bootstrap=False, max_depth=3, min_samples_leaf=3, min_samples_split=4, n_estimators=100, oob_score=False, score=0.787, total=   0.1s
[CV] bootstrap=False, max_depth=3, min_samples_leaf=5, min_samples_split=2, n_estimators=90, oob_score=False 
[CV]  bootstrap=False, max_depth=3, min_samples_leaf=5, min_samples_split=2, n_estimators=90, oob_score=False, score=0.819, total=   0.1s
[CV] bootstrap=False, max_depth=3, min_samples_leaf=5, min_samples_split=2, n_estimators=90, oob_score=False 
[CV

[CV]  bootstrap=False, max_depth=3, min_samples_leaf=7, min_samples_split=2, n_estimators=90, oob_score=False, score=0.806, total=   0.1s
[CV] bootstrap=False, max_depth=3, min_samples_leaf=7, min_samples_split=2, n_estimators=90, oob_score=False 
[CV]  bootstrap=False, max_depth=3, min_samples_leaf=7, min_samples_split=2, n_estimators=90, oob_score=False, score=0.843, total=   0.1s
[CV] bootstrap=False, max_depth=3, min_samples_leaf=7, min_samples_split=2, n_estimators=90, oob_score=False 
[CV]  bootstrap=False, max_depth=3, min_samples_leaf=7, min_samples_split=2, n_estimators=90, oob_score=False, score=0.804, total=   0.1s
[CV] bootstrap=False, max_depth=3, min_samples_leaf=7, min_samples_split=2, n_estimators=90, oob_score=False 
[CV]  bootstrap=False, max_depth=3, min_samples_leaf=7, min_samples_split=2, n_estimators=90, oob_score=False, score=0.787, total=   0.1s
[CV] bootstrap=False, max_depth=3, min_samples_leaf=7, min_samples_split=2, n_estimators=100, oob_score=False 
[CV]  b

[CV]  bootstrap=False, max_depth=4, min_samples_leaf=3, min_samples_split=2, n_estimators=90, oob_score=False, score=0.795, total=   0.1s
[CV] bootstrap=False, max_depth=4, min_samples_leaf=3, min_samples_split=2, n_estimators=100, oob_score=False 
[CV]  bootstrap=False, max_depth=4, min_samples_leaf=3, min_samples_split=2, n_estimators=100, oob_score=False, score=0.814, total=   0.1s
[CV] bootstrap=False, max_depth=4, min_samples_leaf=3, min_samples_split=2, n_estimators=100, oob_score=False 
[CV]  bootstrap=False, max_depth=4, min_samples_leaf=3, min_samples_split=2, n_estimators=100, oob_score=False, score=0.818, total=   0.1s
[CV] bootstrap=False, max_depth=4, min_samples_leaf=3, min_samples_split=2, n_estimators=100, oob_score=False 
[CV]  bootstrap=False, max_depth=4, min_samples_leaf=3, min_samples_split=2, n_estimators=100, oob_score=False, score=0.844, total=   0.1s
[CV] bootstrap=False, max_depth=4, min_samples_leaf=3, min_samples_split=2, n_estimators=100, oob_score=False 
[

[CV]  bootstrap=False, max_depth=4, min_samples_leaf=5, min_samples_split=2, n_estimators=100, oob_score=False, score=0.812, total=   0.1s
[CV] bootstrap=False, max_depth=4, min_samples_leaf=5, min_samples_split=2, n_estimators=100, oob_score=False 
[CV]  bootstrap=False, max_depth=4, min_samples_leaf=5, min_samples_split=2, n_estimators=100, oob_score=False, score=0.795, total=   0.1s
[CV] bootstrap=False, max_depth=4, min_samples_leaf=5, min_samples_split=3, n_estimators=90, oob_score=False 
[CV]  bootstrap=False, max_depth=4, min_samples_leaf=5, min_samples_split=3, n_estimators=90, oob_score=False, score=0.817, total=   0.1s
[CV] bootstrap=False, max_depth=4, min_samples_leaf=5, min_samples_split=3, n_estimators=90, oob_score=False 
[CV]  bootstrap=False, max_depth=4, min_samples_leaf=5, min_samples_split=3, n_estimators=90, oob_score=False, score=0.819, total=   0.1s
[CV] bootstrap=False, max_depth=4, min_samples_leaf=5, min_samples_split=3, n_estimators=90, oob_score=False 
[CV] 

[CV]  bootstrap=False, max_depth=4, min_samples_leaf=7, min_samples_split=3, n_estimators=90, oob_score=False, score=0.846, total=   0.1s
[CV] bootstrap=False, max_depth=4, min_samples_leaf=7, min_samples_split=3, n_estimators=90, oob_score=False 
[CV]  bootstrap=False, max_depth=4, min_samples_leaf=7, min_samples_split=3, n_estimators=90, oob_score=False, score=0.812, total=   0.3s
[CV] bootstrap=False, max_depth=4, min_samples_leaf=7, min_samples_split=3, n_estimators=90, oob_score=False 
[CV]  bootstrap=False, max_depth=4, min_samples_leaf=7, min_samples_split=3, n_estimators=90, oob_score=False, score=0.796, total=   0.1s
[CV] bootstrap=False, max_depth=4, min_samples_leaf=7, min_samples_split=3, n_estimators=100, oob_score=False 
[CV]  bootstrap=False, max_depth=4, min_samples_leaf=7, min_samples_split=3, n_estimators=100, oob_score=False, score=0.817, total=   0.1s
[CV] bootstrap=False, max_depth=4, min_samples_leaf=7, min_samples_split=3, n_estimators=100, oob_score=False 
[CV] 

[Parallel(n_jobs=1)]: Done 360 out of 360 | elapsed:   55.4s finished


In [37]:
#print score

print('\n\nrf train score %.3f, dr test score: %.3f' % (
rf.score(X_train,y_train),
rf.score(X_test, y_test)))
print()
print(rf.best_estimator_.get_params())
print()
print_best_params(rf)



rf train score 0.855, dr test score: 0.893

{'bootstrap': True, 'ccp_alpha': 0.0, 'criterion': 'mse', 'max_depth': 4, 'max_features': 'auto', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 7, 'min_samples_split': 4, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 90, 'n_jobs': None, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}

RandomForestRegressor()

*** RandomForestRegressor Best Parameters ***
bootstrap: True
ccp_alpha: 0.0
criterion: mse
max_depth: 4
max_features: auto
max_leaf_nodes: None
max_samples: None
min_impurity_decrease: 0.0
min_impurity_split: None
min_samples_leaf: 7
min_samples_split: 4
min_weight_fraction_leaf: 0.0
n_estimators: 90
n_jobs: None
oob_score: False
random_state: None
verbose: 0
warm_start: False



### Conclusion

#### In Sample, Out of Sample Scores

Linear Regression: 0.728,0.786

Polynomial Regression: 0.836, 0.847

SVR: 0.598, 0.628

Decision Tree: 0.999, 0.717

Random Forest: 0.973, 0.859

SVR with GridSearch: 0.683, 0.734

Decision Tree with Grid Search: 0.856, 0.880

Random Forest with Grid Search: 0.854, 0.894