In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import r2_score

In [2]:
df = pd.read_csv('insurance.csv')
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [3]:
df.shape

(1338, 7)

In [4]:
df = df.drop(columns=['region'])
df.sample(5)

Unnamed: 0,age,sex,bmi,children,smoker,charges
912,59,female,26.695,3,no,14382.70905
493,61,male,43.4,0,no,12574.049
418,64,male,39.16,1,no,14418.2804
57,18,male,31.68,2,yes,34303.1672
335,64,male,34.5,0,no,13822.803


In [5]:
# Encoding sex, smoker columns

df = pd.get_dummies(df,columns=['sex','smoker'], drop_first=True)
df.sample(5)

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
178,46,28.9,2,8823.279,0,0
1079,63,33.66,3,15161.5344,1,0
316,50,32.205,0,8835.26495,1,0
752,64,37.905,0,14210.53595,1,0
938,18,26.18,2,2304.0022,1,0


In [6]:
X = df[['age', 'bmi', 'children', 'sex_male', 'smoker_yes']]
y = df['charges']

In [7]:
# Scaling

scaler = StandardScaler()

df = scaler.fit_transform(df)
df

array([[-1.43876426, -0.45332   , -0.90861367,  0.2985838 , -1.0105187 ,
         1.97058663],
       [-1.50996545,  0.5096211 , -0.07876719, -0.95368917,  0.98959079,
        -0.5074631 ],
       [-0.79795355,  0.38330685,  1.58092576, -0.72867467,  0.98959079,
        -0.5074631 ],
       ...,
       [-1.50996545,  1.0148781 , -0.90861367, -0.96159623, -1.0105187 ,
        -0.5074631 ],
       [-1.29636188, -0.79781341, -0.90861367, -0.93036151, -1.0105187 ,
        -0.5074631 ],
       [ 1.55168573, -0.26138796, -0.90861367,  1.31105347, -1.0105187 ,
         1.97058663]])

# Modelling

# 1. Linear kernel

In [8]:
svr1 = SVR(kernel="linear")

In [9]:
x = cross_val_score(svr1,X,y,cv=10)
print(np.round(np.mean(x),2))

-0.14


# 2. rbf kernel

In [10]:
svr2 = SVR(kernel="rbf")

In [11]:
x = cross_val_score(svr2,X,y,cv=10)
print(np.round(np.mean(x),2))

-0.1


# 3. Polynomial kernel

In [12]:
svr3 = SVR(kernel="poly", degree=6)

In [13]:
x = cross_val_score(svr3,X,y,cv=10)
print(np.round(np.mean(x),2))

-0.09
