In [1]:
import pandas as pd

In [2]:
dataset = pd.read_csv('insurance_pre.csv')

In [3]:
dataset = pd.get_dummies(dataset, drop_first = True, dtype = int)

In [4]:
dataset.columns

Index(['age', 'bmi', 'children', 'charges', 'sex_male', 'smoker_yes'], dtype='object')

In [5]:
independent = dataset[['age', 'bmi', 'children', 'charges', 'sex_male', 'smoker_yes']]

In [6]:
dependent = dataset[['charges']]

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
x_train, x_test, y_train, y_test = train_test_split(independent, dependent, test_size = 0.3)

In [9]:
param_grids = {
    'criterion': ['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
    'splitter': ['best', 'random'],
    'max_depth': [1,2,3,4,5],
    'min_impurity_decrease': [0.0, 0.1, 0.2, 0.5]
}

In [10]:
from sklearn.tree import DecisionTreeRegressor

In [11]:
model = DecisionTreeRegressor()

In [12]:
from sklearn.model_selection import GridSearchCV

In [13]:
gsv = GridSearchCV(model, param_grids, refit = True, verbose = 3, n_jobs = -1)

In [14]:
gsv.fit(x_train, y_train)

Fitting 5 folds for each of 160 candidates, totalling 800 fits


In [15]:
y_predict = gsv.predict(x_test)

In [16]:
from sklearn.metrics import r2_score

In [17]:
result = r2_score(y_test, y_predict)
print (gsv.best_params_ , result)

{'criterion': 'friedman_mse', 'max_depth': 5, 'min_impurity_decrease': 0.2, 'splitter': 'best'} 0.9982970688265344


In [18]:
#To display in table format

In [19]:
re = gsv.cv_results_

In [20]:
pd.DataFrame.from_dict(re)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_depth,param_min_impurity_decrease,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.023978,0.022503,0.007161,0.001015,squared_error,1,0.0,best,"{'criterion': 'squared_error', 'max_depth': 1,...",0.758507,0.711646,0.744230,0.771314,0.759480,0.749036,0.020573,129
1,0.009607,0.001281,0.010404,0.005312,squared_error,1,0.0,random,"{'criterion': 'squared_error', 'max_depth': 1,...",0.720062,0.599442,0.744090,0.665600,0.769896,0.699818,0.060872,145
2,0.011212,0.002436,0.006651,0.001043,squared_error,1,0.1,best,"{'criterion': 'squared_error', 'max_depth': 1,...",0.758507,0.711646,0.744230,0.771314,0.759480,0.749036,0.020573,135
3,0.008943,0.001577,0.007689,0.001363,squared_error,1,0.1,random,"{'criterion': 'squared_error', 'max_depth': 1,...",0.582273,0.654031,0.683546,0.665600,0.731074,0.663305,0.048290,149
4,0.010097,0.001941,0.008198,0.002592,squared_error,1,0.2,best,"{'criterion': 'squared_error', 'max_depth': 1,...",0.758507,0.711646,0.744230,0.771314,0.759480,0.749036,0.020573,129
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,0.018711,0.011926,0.014452,0.008555,poisson,5,0.1,random,"{'criterion': 'poisson', 'max_depth': 5, 'min_...",0.987761,0.985009,0.981778,0.990592,0.985930,0.986214,0.002925,33
156,0.026181,0.014637,0.010917,0.002704,poisson,5,0.2,best,"{'criterion': 'poisson', 'max_depth': 5, 'min_...",0.997889,0.996270,0.998456,0.997948,0.997940,0.997701,0.000744,9
157,0.015628,0.006858,0.012488,0.005394,poisson,5,0.2,random,"{'criterion': 'poisson', 'max_depth': 5, 'min_...",0.991384,0.961754,0.967432,0.976429,0.980643,0.975528,0.010331,51
158,0.012336,0.003297,0.008507,0.003401,poisson,5,0.5,best,"{'criterion': 'poisson', 'max_depth': 5, 'min_...",0.997788,0.996114,0.997960,0.997358,0.997814,0.997407,0.000677,16


In [21]:
#To Find the best params

In [22]:
gsv.best_params_

{'criterion': 'friedman_mse',
 'max_depth': 5,
 'min_impurity_decrease': 0.2,
 'splitter': 'best'}

In [23]:
gsv.best_estimator_