In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler, StandardScaler, PolynomialFeatures
from sklearn.metrics import accuracy_score, r2_score, mean_absolute_error, mean_squared_error
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import VotingRegressor

In [4]:
df = pd.read_csv("D:\school sht\itd105\itd105-webapp\itd105-webapp\dataset\insurance.csv")
dataframe = pd.DataFrame(df)
dataframe.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [3]:
dataframe.describe()

Unnamed: 0,age,bmi,children,charges
count,1338.0,1338.0,1338.0,1338.0
mean,39.207025,30.663397,1.094918,13270.422265
std,14.04996,6.098187,1.205493,12110.011237
min,18.0,15.96,0.0,1121.8739
25%,27.0,26.29625,0.0,4740.28715
50%,39.0,30.4,1.0,9382.033
75%,51.0,34.69375,2.0,16639.912515
max,64.0,53.13,5.0,63770.42801


In [4]:
dataframe.isna().sum()

age         0
sex         0
bmi         0
children    0
smoker      0
region      0
charges     0
dtype: int64

In [5]:
dataframe["sex"].replace({"male": 0, "female": 1}, inplace = True)
dataframe["smoker"].replace({"yes": 1, "no": 0}, inplace = True)
dataframe.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,1,27.9,0,1,southwest,16884.924
1,18,0,33.77,1,0,southeast,1725.5523
2,28,0,33.0,3,0,southeast,4449.462
3,33,0,22.705,0,0,northwest,21984.47061
4,32,0,28.88,0,0,northwest,3866.8552


In [7]:
dataframe.pop('region')
dataframe.head()

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,1,27.9,0,1,16884.924
1,18,0,33.77,1,0,1725.5523
2,28,0,33.0,3,0,4449.462
3,33,0,22.705,0,0,21984.47061
4,32,0,28.88,0,0,3866.8552


In [8]:
X = dataframe.drop('charges', axis = 1)
y = dataframe['charges']


(1070, 268)

In [12]:
def model_report(y_test, y_pred):
    print(f"Accuracy: {model.score(X_test, y_test) * 100:.2f}%")
    print(f"MAE: {mean_absolute_error(y_test, y_pred)}")
    print(f"MSE: {mean_squared_error(y_test, y_pred)}")
    print(f"R2: {r2_score(y_test, y_pred)}")
    
y_pred = model.predict(X_test)

model_report(y_test, y_pred)

Accuracy: 78.11%
MAE: 4213.484797807139
MSE: 33979257.05080822
R2: 0.7811302113434095


In [15]:
models = []

lr = LinearRegression()
models.append(('LR', lr))

svm = SVR()
models.append(('SVM', svm))

ann = MLPRegressor()
models.append(('MLP', ann))

In [16]:
estimators = []

model0 = LinearRegression()
estimators.append(('lr', model0))

model2 = SVR()
estimators.append(('svm', model2))

model3 = MLPRegressor()
estimators.append(('ann', model3))

vot = VotingRegressor(estimators)
models.append(('VOT', vot))

In [17]:
from sklearn.ensemble import BaggingRegressor

baglr = BaggingRegressor(base_estimator = LinearRegression(), n_estimators=10)
models.append(('BAGLR', baglr))

bagsvm = BaggingRegressor(base_estimator = SVR(), n_estimators=10)
models.append(('BAGSVM', bagsvm))

bagann = BaggingRegressor(base_estimator = MLPRegressor(), n_estimators=10)
models.append(('BAGMLP', bagann))

In [18]:
from sklearn.ensemble import AdaBoostRegressor

adalr = AdaBoostRegressor(base_estimator=LinearRegression(), n_estimators=10)
models.append(('ADALR', adalr))

adasvm = AdaBoostRegressor(base_estimator=SVR(), n_estimators=10)
models.append(('ADASVM', adasvm))

In [21]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

results = []
names = []
msgs = []
scoring = 'neg_mean_squared_error'

for name, model in models:
  kfold = KFold(n_splits=10)
  cv_results = cross_val_score(model, X, y, cv=kfold, scoring=scoring)
  results.append(cv_results)
  names.append(name)
  msg = "%s:\t %f (%f)" % (name, cv_results.mean(), cv_results.std())
  msgs.append(msg)









In [22]:
for msg in msgs:
    print(msg)

LR:	 -37044419.407231 (5649232.839859)
SVM:	 -161633182.938609 (22157833.393740)
MLP:	 -138751793.819452 (19122302.655004)
VOT:	 -85636724.330560 (12314835.773729)
BAGLR:	 -37012431.839204 (5639042.718596)
BAGSVM:	 -161829534.365179 (22426909.774559)
BAGMLP:	 -137077429.274889 (18039400.229223)
ADALR:	 -39894260.635040 (5484073.239479)
ADASVM:	 -149562006.381382 (20463749.527038)


In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

result = lr_model.score(X_test, y_test)
print(("Accuracy : %.3f%%") % (result * 100.0))

Accuracy : 78.113%


In [25]:
import joblib

filename = 'D:\\school sht\\itd105\\itd105-webapp\\itd105-webapp\\ml model\\regression_model.aiml'
joblib.dump(lr_model, filename)

['D:\\school sht\\itd105\\itd105-webapp\\itd105-webapp\\ml model\\regression_model.aiml']