In [1]:
#importing the Libraies
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# Reading the Dataset
dataset = pd.read_csv('insurance_pre.csv')

In [3]:
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [4]:
# Convert nominal data into numerical values
dataset=pd.get_dummies(dataset,drop_first=True)

In [5]:
dataset

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,False,True
1,18,33.770,1,1725.55230,True,False
2,28,33.000,3,4449.46200,True,False
3,33,22.705,0,21984.47061,True,False
4,32,28.880,0,3866.85520,True,False
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,True,False
1334,18,31.920,0,2205.98080,False,False
1335,18,36.850,0,1629.83350,False,False
1336,21,25.800,0,2007.94500,False,False


In [6]:
indep=dataset[['age', 'bmi', 'children','sex_male', 'smoker_yes']]
dep=dataset['charges']

In [7]:
#split into training set and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(indep, dep, test_size = 1/3, random_state = 0)


In [8]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [11]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
param_grid = {'criterion':['squared_error','friedman_mse','absolute_error','poisson'],
              'max_features': ['sqrt','log2',None],
              'splitter':['best','random']} 

# GridSearch for Decision Tree Regressor
grid = GridSearchCV(DecisionTreeRegressor(), param_grid, refit = True, verbose = 3,n_jobs=-1) 
   
# fitting the model for grid search 
grid.fit(X_train, y_train) 

Fitting 5 folds for each of 24 candidates, totalling 120 fits


In [12]:
# print best parameter after tuning 
#print(grid.best_params_) 
re=grid.cv_results_
#print(re)
grid_predictions = grid.predict(X_test) 
   
# print classification report 
from sklearn.metrics import r2_score
r_score=r2_score(y_test,grid_predictions)

print("The R_score value for best parameter {}:".format(grid.best_params_),r_score)



The R_score value for best parameter {'criterion': 'absolute_error', 'max_features': 'log2', 'splitter': 'random'}: 0.7257958494594806


In [13]:
table=pd.DataFrame.from_dict(re)

In [14]:
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.003668,0.000291,0.001657,0.000295,squared_error,sqrt,best,"{'criterion': 'squared_error', 'max_features':...",0.677063,0.52304,0.331426,0.562209,0.423083,0.503364,0.118346,24
1,0.002903,0.000377,0.001469,0.000141,squared_error,sqrt,random,"{'criterion': 'squared_error', 'max_features':...",0.568033,0.583665,0.577545,0.483797,0.641327,0.570874,0.050522,18
2,0.003194,0.000544,0.001467,0.00022,squared_error,log2,best,"{'criterion': 'squared_error', 'max_features':...",0.674399,0.323608,0.676411,0.626156,0.629696,0.586054,0.132936,14
3,0.002886,0.000402,0.001529,0.00016,squared_error,log2,random,"{'criterion': 'squared_error', 'max_features':...",0.490495,0.615733,0.645803,0.733356,0.630647,0.623207,0.077932,9
4,0.00381,0.000387,0.001408,0.000183,squared_error,,best,"{'criterion': 'squared_error', 'max_features':...",0.693241,0.475168,0.750248,0.633401,0.665231,0.643458,0.092498,6
5,0.004011,0.001368,0.002058,0.001073,squared_error,,random,"{'criterion': 'squared_error', 'max_features':...",0.590896,0.693723,0.727217,0.487898,0.675336,0.635014,0.086213,8
6,0.003292,0.000314,0.002535,0.00197,friedman_mse,sqrt,best,"{'criterion': 'friedman_mse', 'max_features': ...",0.675272,0.526953,0.662221,0.575362,0.480208,0.584003,0.075566,15
7,0.003151,0.000374,0.001988,0.000561,friedman_mse,sqrt,random,"{'criterion': 'friedman_mse', 'max_features': ...",0.522005,0.690701,0.511904,0.390562,0.65896,0.554826,0.108819,20
8,0.003535,0.001226,0.001345,0.0002,friedman_mse,log2,best,"{'criterion': 'friedman_mse', 'max_features': ...",0.619461,0.646818,0.71757,0.61063,0.68401,0.655698,0.040132,2
9,0.002764,0.000455,0.00137,0.000402,friedman_mse,log2,random,"{'criterion': 'friedman_mse', 'max_features': ...",0.541962,0.567487,0.676,0.557122,0.603529,0.58922,0.047899,13


In [15]:
age_input=float(input("Age:"))
bmi_input=float(input("BMI:"))
children_input=float(input("Children:"))
sex_male_input=int(input("Sex Male 0 or 1:"))
smoker_yes_input=int(input("Smoker Yes 0 or 1:"))

Age: 45
BMI: 23.5
Children: 2
Sex Male 0 or 1: 1
Smoker Yes 0 or 1: 0


In [16]:
Future_Prediction=grid.predict([[age_input,bmi_input,children_input,sex_male_input,smoker_yes_input]])# change the paramter,play with it.
print("Future_Prediction={}".format(Future_Prediction))

Future_Prediction=[19496.71917]


In [17]:
table=pd.DataFrame.from_dict(re)
best_r = table['mean_test_score'].max()
best_r

0.6586718073641936

In [18]:
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.003668,0.000291,0.001657,0.000295,squared_error,sqrt,best,"{'criterion': 'squared_error', 'max_features':...",0.677063,0.52304,0.331426,0.562209,0.423083,0.503364,0.118346,24
1,0.002903,0.000377,0.001469,0.000141,squared_error,sqrt,random,"{'criterion': 'squared_error', 'max_features':...",0.568033,0.583665,0.577545,0.483797,0.641327,0.570874,0.050522,18
2,0.003194,0.000544,0.001467,0.00022,squared_error,log2,best,"{'criterion': 'squared_error', 'max_features':...",0.674399,0.323608,0.676411,0.626156,0.629696,0.586054,0.132936,14
3,0.002886,0.000402,0.001529,0.00016,squared_error,log2,random,"{'criterion': 'squared_error', 'max_features':...",0.490495,0.615733,0.645803,0.733356,0.630647,0.623207,0.077932,9
4,0.00381,0.000387,0.001408,0.000183,squared_error,,best,"{'criterion': 'squared_error', 'max_features':...",0.693241,0.475168,0.750248,0.633401,0.665231,0.643458,0.092498,6
5,0.004011,0.001368,0.002058,0.001073,squared_error,,random,"{'criterion': 'squared_error', 'max_features':...",0.590896,0.693723,0.727217,0.487898,0.675336,0.635014,0.086213,8
6,0.003292,0.000314,0.002535,0.00197,friedman_mse,sqrt,best,"{'criterion': 'friedman_mse', 'max_features': ...",0.675272,0.526953,0.662221,0.575362,0.480208,0.584003,0.075566,15
7,0.003151,0.000374,0.001988,0.000561,friedman_mse,sqrt,random,"{'criterion': 'friedman_mse', 'max_features': ...",0.522005,0.690701,0.511904,0.390562,0.65896,0.554826,0.108819,20
8,0.003535,0.001226,0.001345,0.0002,friedman_mse,log2,best,"{'criterion': 'friedman_mse', 'max_features': ...",0.619461,0.646818,0.71757,0.61063,0.68401,0.655698,0.040132,2
9,0.002764,0.000455,0.00137,0.000402,friedman_mse,log2,random,"{'criterion': 'friedman_mse', 'max_features': ...",0.541962,0.567487,0.676,0.557122,0.603529,0.58922,0.047899,13


In [19]:
best_r = table['mean_test_score'].max()
best_r

0.6586718073641936