## Import Dependencies

In [1]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, mean_squared_error, mean_absolute_error, r2_score

In [2]:
df = pd.read_csv("https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv")

In [3]:
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


## Label Encoding

In [4]:
encoder = LabelEncoder()
df['sex'] = encoder.fit_transform(df['sex'])
df['smoker'] = encoder.fit_transform(df['smoker'])
df['region'] = encoder.fit_transform(df['region'])
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,0,27.9,0,1,3,16884.924
1,18,1,33.77,1,0,2,1725.5523
2,28,1,33.0,3,0,2,4449.462
3,33,1,22.705,0,0,1,21984.47061
4,32,1,28.88,0,0,1,3866.8552


## Downscaling data

In [5]:
scaler = StandardScaler()
cols = df.columns
df = pd.DataFrame(scaler.fit_transform(df), columns=cols)
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,-1.438764,-1.010519,-0.45332,-0.908614,1.970587,1.343905,0.298584
1,-1.509965,0.989591,0.509621,-0.078767,-0.507463,0.438495,-0.953689
2,-0.797954,0.989591,0.383307,1.580926,-0.507463,0.438495,-0.728675
3,-0.441948,0.989591,-1.305531,-0.908614,-0.507463,-0.466915,0.719843
4,-0.513149,0.989591,-0.292556,-0.908614,-0.507463,-0.466915,-0.776802


In [6]:
x = df.drop('charges', axis=1)
y = df['charges']

## Significant Features


In [7]:
model0 = RandomForestRegressor()
model0.fit(x, y)

feat_importances = pd.Series(model0.feature_importances_, index=x.columns)
significant_features = feat_importances.nlargest(5)
print(significant_features)

smoker      0.620478
bmi         0.210754
age         0.129378
children    0.019977
region      0.013404
dtype: float64


so the smoking is the most significant factor effecting the insurance Premiums

## Identifying and Cliping Extreme Values

In [8]:
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1

df = df[~((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)]

In [9]:
from scipy.stats import zscore

df = df.apply(zscore)

In [10]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=12)

## Machine Learning Approach

In [11]:
def model(Model):
    model1 = Model()
    model1.fit(xtrain, ytrain)

    ypreds1 = model1.predict(xtest)
    rmse = np.sqrt(mean_squared_error(ypreds1, ytest))
    mae = mean_absolute_error(ypreds1, ytest)
    r2 = r2_score(ypreds1, ytest)

    print(f"RMSE: {rmse}")
    print(f"MAE: {mae}")
    print(f"R2 Score: {r2}")

    return rmse, mae, r2

In [12]:
model(LinearRegression)

RMSE: 0.5341781674199009
MAE: 0.3567235280165466
R2 Score: 0.6049261266875525


(np.float64(0.5341781674199009), 0.3567235280165466, 0.6049261266875525)

In [13]:
model(Ridge)

RMSE: 0.5341574874435228
MAE: 0.3567654942632783
R2 Score: 0.6042249635521943


(np.float64(0.5341574874435228), 0.3567654942632783, 0.6042249635521943)

In [14]:
from sklearn.linear_model import ElasticNet
model(ElasticNet)

RMSE: 0.8517097561291235
MAE: 0.6035609277606909
R2 Score: -19.34233680923695


(np.float64(0.8517097561291235), 0.6035609277606909, -19.34233680923695)

In [15]:
from sklearn.linear_model import BayesianRidge
model(BayesianRidge)

RMSE: 0.5341393991041546
MAE: 0.3568066556225148
R2 Score: 0.6035732263664446


(np.float64(0.5341393991041546), 0.3568066556225148, 0.6035732263664446)

In [16]:
from sklearn.linear_model import HuberRegressor
model(HuberRegressor)

RMSE: 0.5959675795110201
MAE: 0.29722553296585047
R2 Score: 0.6806995929847693


(np.float64(0.5959675795110201), 0.29722553296585047, 0.6806995929847693)

In [17]:
from sklearn.ensemble import GradientBoostingRegressor
model(GradientBoostingRegressor)

RMSE: 0.41432051013842464
MAE: 0.22255259153055534
R2 Score: 0.7705404796439168


(np.float64(0.41432051013842464), 0.22255259153055534, 0.7705404796439168)

In [18]:
from sklearn.ensemble import HistGradientBoostingRegressor
model(HistGradientBoostingRegressor)

RMSE: 0.4125426359810492
MAE: 0.23012073256103982
R2 Score: 0.7844119638219904


(np.float64(0.4125426359810492), 0.23012073256103982, 0.7844119638219904)

In [19]:
from sklearn.svm import SVR
model(SVR)

RMSE: 0.4260439712644698
MAE: 0.22500264875874137
R2 Score: 0.7600190401121613


(np.float64(0.4260439712644698), 0.22500264875874137, 0.7600190401121613)

In [20]:
from sklearn.tree import DecisionTreeRegressor
model(DecisionTreeRegressor)

RMSE: 0.505679254653849
MAE: 0.23336154493480182
R2 Score: 0.7202383676131415


(np.float64(0.505679254653849), 0.23336154493480182, 0.7202383676131415)

In [21]:
from sklearn.neighbors import KNeighborsRegressor
model(KNeighborsRegressor)

RMSE: 0.48678886699334634
MAE: 0.3050491968692186
R2 Score: 0.6736280326218784


(np.float64(0.48678886699334634), 0.3050491968692186, 0.6736280326218784)

In [22]:
from sklearn.gaussian_process import GaussianProcessRegressor
model(GaussianProcessRegressor)

RMSE: 53.9762766776072
MAE: 19.74355529204885
R2 Score: -0.0005076753942947487


(np.float64(53.9762766776072), 19.74355529204885, -0.0005076753942947487)

In [23]:
from sklearn.linear_model import ARDRegression
model(ARDRegression)

RMSE: 0.5361787908926874
MAE: 0.3571852245018066
R2 Score: 0.6026581132195609


(np.float64(0.5361787908926874), 0.3571852245018066, 0.6026581132195609)

## Deep Learning Approach

In [24]:
import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(xtrain.shape[1],)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

model.compile(
    loss = tf.keras.losses.Huber(),
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.01),
    metrics = ['mae']
)

es = tf.keras.callbacks.EarlyStopping(patience=10,restore_best_weights=True)

history = model.fit(xtrain, ytrain, epochs=200, verbose=1,validation_split=0.2, callbacks=[es])
evaluation = model.evaluate(xtest, ytest)
print(evaluation)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/200
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 28ms/step - loss: 0.1987 - mae: 0.4821 - val_loss: 0.1041 - val_mae: 0.3035
Epoch 2/200
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 0.1170 - mae: 0.3150 - val_loss: 0.0748 - val_mae: 0.2201
Epoch 3/200
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 0.0920 - mae: 0.2691 - val_loss: 0.0697 - val_mae: 0.2572
Epoch 4/200
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - loss: 0.0853 - mae: 0.2566 - val_loss: 0.0690 - val_mae: 0.2386
Epoch 5/200
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0813 - mae: 0.2530 - val_loss: 0.0755 - val_mae: 0.2500
Epoch 6/200
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 0.0698 - mae: 0.2260 - val_loss: 0.0661 - val_mae: 0.2339
Epoch 7/200
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - 

In [25]:
ypreds = model.predict(xtest)
ypreds = ypreds.flatten()
r2 = r2_score(ypreds, ytest)
print(r2)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
0.7363235608591918


## Hyperparameter Tuning for Deep Learning model

In [27]:
!pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [28]:
import tensorflow as tf
from kerastuner import HyperModel, RandomSearch
from kerastuner.engine.hyperparameters import HyperParameters

class RegressionHyperModel(HyperModel):
    def build(self, hp):
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.Dense(
            units=hp.Int('units_1', min_value=64, max_value=512, step=64),
            activation='relu',
            input_shape=(xtrain.shape[1],)
        ))
        model.add(tf.keras.layers.Dropout(rate=hp.Float('dropout_1', min_value=0.2, max_value=0.5, step=0.1)))
        model.add(tf.keras.layers.Dense(
            units=hp.Int('units_2', min_value=32, max_value=256, step=32),
            activation='relu'
        ))
        model.add(tf.keras.layers.Dropout(rate=hp.Float('dropout_2', min_value=0.2, max_value=0.5, step=0.1)))
        model.add(tf.keras.layers.Dense(1))

        model.compile(
            loss=tf.keras.losses.Huber(),
            optimizer=tf.keras.optimizers.Adam(
                learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')
            ),
            metrics=['mae']
        )

        return model

hypermodel = RegressionHyperModel()

tuner = RandomSearch(
    hypermodel,
    objective='val_loss',
    max_trials=10,
    executions_per_trial=3,
    directory='my_dir',
    project_name='insurance_regression_tuning'
)

tuner.search(xtrain, ytrain, epochs=100, validation_split=0.2, verbose=1)

best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

best_model = tuner.hypermodel.build(best_hyperparameters)

best_model.fit(xtrain, ytrain, epochs=100, validation_split=0.2, verbose=1)

evaluation = best_model.evaluate(xtest, ytest)
print(f"Test Loss: {evaluation[0]}, Test MAE: {evaluation[1]}")

ypreds = best_model.predict(xtest)
ypreds = ypreds.flatten()
r2 = r2_score(ytest, ypreds)
print(f"R² Score: {r2}")

Trial 10 Complete [00h 01m 32s]
val_loss: 0.058535912384589515

Best val_loss So Far: 0.05720652391513189
Total elapsed time: 00h 15m 57s
Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - loss: 0.2366 - mae: 0.5040 - val_loss: 0.0799 - val_mae: 0.2831
Epoch 2/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0971 - mae: 0.2925 - val_loss: 0.0713 - val_mae: 0.2178
Epoch 3/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0802 - mae: 0.2552 - val_loss: 0.0654 - val_mae: 0.2500
Epoch 4/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0884 - mae: 0.2724 - val_loss: 0.0745 - val_mae: 0.2637
Epoch 5/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.1090 - mae: 0.3107 - val_loss: 0.0635 - val_mae: 0.2018
Epoch 6/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0808 - mae: 0.2583 - val_loss: 0.0709 - val_mae: 0.2611
Epoch 7/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0946 - mae

In [29]:
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]
print("Best Hyperparameters:")
for key, value in best_hyperparameters.values.items():
    print(f"{key}: {value}")

Best Hyperparameters:
units_1: 448
dropout_1: 0.4
units_2: 64
dropout_2: 0.2
learning_rate: 0.007933730389078075


In [31]:
best_model = tuner.hypermodel.build(best_hyperparameters)
best_model.fit(xtrain, ytrain, epochs=100, validation_split=0.2, verbose=1)

evaluation = best_model.evaluate(xtest, ytest)
print(f"Test Loss: {evaluation[0]}, Test MAE: {evaluation[1]}")

ypreds = best_model.predict(xtest)
ypreds = ypreds.flatten()
r2 = r2_score(ytest, ypreds)
print(f"R² Score: {r2}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - loss: 0.2072 - mae: 0.4688 - val_loss: 0.1008 - val_mae: 0.3308
Epoch 2/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.1159 - mae: 0.3283 - val_loss: 0.1001 - val_mae: 0.2838
Epoch 3/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.1072 - mae: 0.2981 - val_loss: 0.0932 - val_mae: 0.2802
Epoch 4/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0954 - mae: 0.2747 - val_loss: 0.0675 - val_mae: 0.2033
Epoch 5/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0794 - mae: 0.2461 - val_loss: 0.0713 - val_mae: 0.2502
Epoch 6/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0944 - mae: 0.2687 - val_loss: 0.0653 - val_mae: 0.2577
Epoch 7/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 