In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.svm import SVR


data = pd.read_csv('insurance.csv')
cat_col = ['gender','region','smoker']
data_dummies = pd.get_dummies(data,columns = cat_col, drop_first = True)
X = data_dummies[['age','bmi','children','gender_male','region_northwest','region_southeast','region_southwest','smoker_yes']]
Y = data[['charges']]

# train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size = 0.20, random_state = 42)
test_df = pd.concat ([X_test,y_test],axis =1 )

# Model 1 Linear regression
regr = LinearRegression()  
regr.fit(X_train, y_train)

predicted_lreg = regr.predict(X_test)
test_df['predicted_lreg'] = predicted_lreg
mse = mean_squared_error(y_test, predicted_lreg)
print(mse)

# SVR 
regressor = SVR(kernel='rbf')
regressor.fit(X_train, y_train)
predicted_svr = regressor.predict(X_test)
test_df['predicted_svr'] = predicted_svr

mse = mean_squared_error(y_test, predicted_svr)
print(mse)

## Averaging
test_df.head()
test_df['final_pred_avg'] = test_df[['predicted_lreg', 'predicted_svr']].mean(axis=1)
mse = mean_squared_error(y_test, test_df['final_pred_avg'])
print(mse)

## Weighted Averaging
test_df['predicted_lreg_wgtd'] = predicted_lreg*0.6
test_df['predicted_svr_wgtd'] = predicted_svr*0.4
test_df['final_pred_avg_wghtd'] = test_df[['predicted_lreg_wgtd', 'predicted_svr_wgtd']].sum(axis=1)
mse = mean_squared_error(y_test, test_df['final_pred_avg_wghtd'])
print(mse)

33596915.851361446
166492259.9106511
67995542.8942302
55987970.28777228


  y = column_or_1d(y, warn=True)
