In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
dataset = pd.read_csv("insurance_pre.csv")

In [3]:
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [4]:
dataset = pd.get_dummies(dataset, drop_first=True).astype(int)

In [5]:
dataset

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27,0,16884,0,1
1,18,33,1,1725,1,0
2,28,33,3,4449,1,0
3,33,22,0,21984,1,0
4,32,28,0,3866,1,0
...,...,...,...,...,...,...
1333,50,30,3,10600,1,0
1334,18,31,0,2205,0,0
1335,18,36,0,1629,0,0
1336,21,25,0,2007,0,0


In [6]:
dataset.columns

Index(['age', 'bmi', 'children', 'charges', 'sex_male', 'smoker_yes'], dtype='object')

In [7]:
independent = dataset[['age', 'bmi', 'children', 'sex_male', 'smoker_yes']]
dependent = dataset[['charges']]

In [8]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(independent,dependent,test_size=0.30,random_state=0)

In [9]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [12]:

from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor

Y_train = np.ravel(Y_train)
param_grid = { 
    'n_estimators': [50, 100],  
    'learning_rate': [0.01, 0.1],  
    'subsample': [0.5, 1.0],  
    'loss': ['squared_error', 'quantile', 'huber', 'absolute_error'],  
    'max_features': ['sqrt', 'log2', None]
}


grid = GridSearchCV(GradientBoostingRegressor(), param_grid)
grid.fit(X_train, Y_train)




In [13]:
result=grid.cv_results_
y_pre = grid.predict(X_test)
from sklearn.metrics import r2_score
r_score = r2_score(Y_test, y_pre)
print("The R_score value for best parameter {}:".format(grid.best_params_),r_score)

The R_score value for best parameter {'learning_rate': 0.1, 'loss': 'huber', 'max_features': None, 'n_estimators': 50, 'subsample': 0.5}: 0.8913509398179683


In [14]:
table=pd.DataFrame.from_dict(result)

In [15]:
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_learning_rate,param_loss,param_max_features,param_n_estimators,param_subsample,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.201622,0.057782,0.006401,0.005987,0.01,squared_error,sqrt,50,0.5,"{'learning_rate': 0.01, 'loss': 'squared_error...",0.474331,0.401637,0.404268,0.420768,0.359308,0.412062,0.037163,74
1,0.097611,0.005988,0.001601,0.003202,0.01,squared_error,sqrt,50,1.0,"{'learning_rate': 0.01, 'loss': 'squared_error...",0.452779,0.386207,0.415644,0.433854,0.395780,0.416853,0.024349,72
2,0.195222,0.006402,0.003201,0.003920,0.01,squared_error,sqrt,100,0.5,"{'learning_rate': 0.01, 'loss': 'squared_error...",0.647128,0.606624,0.641991,0.653945,0.593510,0.628640,0.023997,41
3,0.176018,0.005058,0.004801,0.003920,0.01,squared_error,sqrt,100,1.0,"{'learning_rate': 0.01, 'loss': 'squared_error...",0.657911,0.602556,0.638620,0.650459,0.589865,0.627882,0.026883,42
4,0.096012,0.000003,0.001600,0.003199,0.01,squared_error,log2,50,0.5,"{'learning_rate': 0.01, 'loss': 'squared_error...",0.442952,0.384971,0.428909,0.456309,0.366094,0.415847,0.034558,73
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,0.547262,0.008158,0.000000,0.000000,0.1,absolute_error,log2,100,1.0,"{'learning_rate': 0.1, 'loss': 'absolute_error...",0.871871,0.813248,0.801440,0.811893,0.758383,0.811367,0.036250,29
92,0.299234,0.006400,0.001599,0.003199,0.1,absolute_error,,50,0.5,"{'learning_rate': 0.1, 'loss': 'absolute_error...",0.843165,0.776650,0.755444,0.811599,0.715891,0.780550,0.044070,35
93,0.308834,0.003920,0.000000,0.000000,0.1,absolute_error,,50,1.0,"{'learning_rate': 0.1, 'loss': 'absolute_error...",0.832884,0.739322,0.711968,0.783393,0.690134,0.751540,0.051220,36
94,0.576064,0.005060,0.004801,0.003920,0.1,absolute_error,,100,0.5,"{'learning_rate': 0.1, 'loss': 'absolute_error...",0.868998,0.817272,0.801444,0.819932,0.773518,0.816233,0.031120,28


In [16]:
Age_input=float(input("Age:"))
BMI_input=float(input("BMI:"))
Child_input=float(input("Children:"))
Gender_input=int(input("Sex Male 0 or 1:"))
Smoker_input=int(input("Smoker Yes 0 or 1:"))

Age:34
BMI:44
Children:1
Sex Male 0 or 1:1
Smoker Yes 0 or 1:0


In [17]:
F_Prediction=grid.predict([[Age_input,BMI_input,Child_input,Gender_input,Smoker_input]])
print("Future_Prediction={}".format(F_Prediction))

Future_Prediction=[15914.15445059]
