In [1]:
# Importing the Libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# Reading the Dataset
dataset = pd.read_csv('insurance_pre.csv')

In [3]:
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [4]:
# Encoding categorical variables
dataset = pd.get_dummies(dataset, drop_first=True)

In [5]:
dataset

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,False,True
1,18,33.770,1,1725.55230,True,False
2,28,33.000,3,4449.46200,True,False
3,33,22.705,0,21984.47061,True,False
4,32,28.880,0,3866.85520,True,False
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,True,False
1334,18,31.920,0,2205.98080,False,False
1335,18,36.850,0,1629.83350,False,False
1336,21,25.800,0,2007.94500,False,False


In [6]:
# Separate independent and dependent variables
indep = dataset[['age', 'bmi', 'children','sex_male', 'smoker_yes']]
dep = dataset['charges']

In [7]:
# Importing necessary libraries
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR

# Define parameter grid
param_grid = {'kernel': ['rbf', 'poly', 'sigmoid'],
              'C': [10, 100, 1000],
              'gamma': ['auto', 'scale']}

# Create GridSearchCV object
grid = GridSearchCV(SVR(), param_grid, refit=True, verbose=3, n_jobs=-1, cv=3)

# Fit the model
grid.fit(indep, dep)

# Print best parameters
print("Best parameters:", grid.best_params_)


Fitting 3 folds for each of 18 candidates, totalling 54 fits
[CV 2/3] END .....C=10, gamma=auto, kernel=poly;, score=0.835 total time=33.5min
[CV 1/3] END C=1000, gamma=auto, kernel=sigmoid;, score=-0.124 total time=   0.1s
[CV 2/3] END C=1000, gamma=auto, kernel=sigmoid;, score=-0.073 total time=   0.1s
[CV 3/3] END C=1000, gamma=auto, kernel=sigmoid;, score=-0.116 total time=   0.1s
[CV 1/3] END ..C=1000, gamma=scale, kernel=rbf;, score=-0.142 total time=   0.2s
[CV 2/3] END ..C=1000, gamma=scale, kernel=rbf;, score=-0.098 total time=   0.2s
[CV 3/3] END ..C=1000, gamma=scale, kernel=rbf;, score=-0.203 total time=   0.2s
[CV 1/3] END .C=1000, gamma=scale, kernel=poly;, score=-0.075 total time=   0.1s
[CV 2/3] END .C=1000, gamma=scale, kernel=poly;, score=-0.057 total time=   0.1s
[CV 3/3] END .C=1000, gamma=scale, kernel=poly;, score=-0.149 total time=   0.1s
[CV 1/3] END C=1000, gamma=scale, kernel=sigmoid;, score=-1.636 total time=   0.1s
[CV 2/3] END C=1000, gamma=scale, kernel=si

In [8]:
# print best parameter after tuning 
re=grid.cv_results_

print("The R_score value for best parameter {}:".format(grid.best_params_))

The R_score value for best parameter {'C': 10, 'gamma': 'auto', 'kernel': 'poly'}:


In [9]:
table=pd.DataFrame.from_dict(re)

In [10]:
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,0.144658,0.004599,0.130463,0.004199,10,auto,rbf,"{'C': 10, 'gamma': 'auto', 'kernel': 'rbf'}",-0.121859,-0.071812,-0.115978,-0.103216,0.022336,6
1,2077.723882,585.111359,0.046734,0.009342,10,auto,poly,"{'C': 10, 'gamma': 'auto', 'kernel': 'poly'}",0.841561,0.835473,0.792087,0.82304,0.022028,1
2,0.091982,0.000782,0.041814,0.005964,10,auto,sigmoid,"{'C': 10, 'gamma': 'auto', 'kernel': 'sigmoid'}",-0.124306,-0.073333,-0.116351,-0.104664,0.022391,8
3,0.122326,0.00453,0.13077,0.001675,10,scale,rbf,"{'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}",-0.120647,-0.062407,-0.126932,-0.103329,0.02905,7
4,0.097044,0.00181,0.033605,0.004935,10,scale,poly,"{'C': 10, 'gamma': 'scale', 'kernel': 'poly'}",-0.117169,-0.084321,-0.180189,-0.127226,0.039778,12
5,0.109594,0.005192,0.049066,0.006202,10,scale,sigmoid,"{'C': 10, 'gamma': 'scale', 'kernel': 'sigmoid'}",-0.126045,-0.075274,-0.117573,-0.106297,0.022208,11
6,0.119939,0.006663,0.125036,0.00675,100,auto,rbf,"{'C': 100, 'gamma': 'auto', 'kernel': 'rbf'}",-0.11301,-0.064058,-0.111684,-0.096251,0.02277,5
7,5419.485103,2395.33094,0.048858,0.009321,100,auto,poly,"{'C': 100, 'gamma': 'auto', 'kernel': 'poly'}",0.80216,0.732073,0.685501,0.739911,0.047947,2
8,0.078878,0.003652,0.035267,0.005743,100,auto,sigmoid,"{'C': 100, 'gamma': 'auto', 'kernel': 'sigmoid'}",-0.124306,-0.073333,-0.116351,-0.104664,0.022391,8
9,0.102668,0.011942,0.137624,0.018626,100,scale,rbf,"{'C': 100, 'gamma': 'scale', 'kernel': 'rbf'}",-0.147772,-0.094747,-0.201098,-0.147873,0.043418,16


In [11]:
age_input=float(input("Age:"))
bmi_input=float(input("BMI:"))
children_input=float(input("Children:"))
sex_male_input=int(input("Sex Male 0 or 1:"))
smoker_yes_input=int(input("Smoker Yes 0 or 1:"))

Age:34
BMI:22
Children:2
Sex Male 0 or 1:0
Smoker Yes 0 or 1:0


In [12]:
Future_Prediction=grid.predict([[age_input,bmi_input,children_input,sex_male_input,smoker_yes_input]])# change the paramter,play with it.
print("Future_Prediction={}".format(Future_Prediction))

Future_Prediction=[5640.38796943]


