In [None]:
from sklearn.ensemble import VotingClassifier,RandomForestClassifier,VotingRegressor, RandomForestRegressor,StackingRegressor
from sklearn.pipeline import Pipeline
from sklearn.svm import SVR
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder ,MinMaxScaler,StandardScaler,RobustScaler
from sklearn.model_selection import train_test_split, cross_val_score,GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error,r2_score,accuracy_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn import tree
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from sklearn.impute import SimpleImputer

In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/mujahidashraf/data/refs/heads/main/insurance.csv")


In [None]:
le = LabelEncoder()
df['sex'] = le.fit_transform(df['sex'])
df['smoker'] = le.fit_transform(df['smoker'])
df['region'] = le.fit_transform(df['region'])
X = df[['age','sex','bmi','children','smoker','region']]
y = df['expenses']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size=0.3, random_state=42)

In [None]:
pipeline_lr = Pipeline([
    ('scaler', StandardScaler()),
    ('lr', LinearRegression())
])

pipeline_ridge = Pipeline([
    ('scaler', StandardScaler()),
    ('ridge', Ridge())
])

pipeline_lasso = Pipeline([
    ('scaler', StandardScaler()),
    ('lasso', Lasso())
])

pipeline_knn = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsRegressor(n_neighbors=5))
])

pipeline_tree = Pipeline([
    ('scaler', StandardScaler()),
    ('tree', DecisionTreeRegressor(ccp_alpha=0.0,
        criterion='squared_error',
        max_depth=5,
        max_features=None,
        min_samples_leaf=4,
        min_samples_split=2,
        random_state=42,
        splitter='best'))
])

pipeline_rf = Pipeline([
    ('scaler', StandardScaler()),
    ('rf', RandomForestRegressor(n_estimators=100, random_state=42))
])

pipeline_svr = Pipeline([
    ('scaler', StandardScaler()),
    ('svr', SVR(C=10,
        degree=3,
        epsilon=0.2,
        gamma=1,
        kernel='poly'))
])


Stacking_reg = StackingRegressor(estimators=[
    ('lr', pipeline_lr),
    ('ridge', pipeline_ridge),
    ('lasso', pipeline_lasso),
    ('knn', pipeline_knn),
    ('tree', pipeline_tree),
    ('rf', pipeline_rf),
    ('svr', pipeline_svr)
],
                                 final_estimator=pipeline_tree,
                                 cv=5)

# Train
Stacking_reg.fit(X_train, y_train)

In [None]:
for name, reg in Stacking_reg.named_estimators_.items():
  print(name, "=", reg.score(X_test, y_test))

lr = 0.7694626233326285
ridge = 0.7694438657226381
lasso = 0.7694666087817765
knn = 0.8276139187631248
tree = 0.8594369540424764
rf = 0.854386281612938
svr = 0.8513880078920127


In [None]:
Stacking_reg.score(X_test, y_test)

0.8571506172763184

In [None]:
y_pred = Stacking_reg.predict(X_test)

mae = mean_absolute_error(y_test , y_pred)
mse = mean_squared_error(y_test , y_pred)
rmse = np.sqrt(mse)
r2=r2_score(y_test , y_pred)

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R2: {r2}")
print(100*'_')

MAE: 2630.478557146724
MSE: 20945192.034433383
RMSE: 4576.591748717968
R2: 0.8571506172763184
____________________________________________________________________________________________________
