In [1]:
%cd ..

d:\ML Projects\Insurance_premium_predictor


In [2]:
import pandas as pd
data = pd.read_csv('Data\\clean_data.csv')

In [3]:
X = data.drop(columns=['Premium Amount'])
y = data['Premium Amount']

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 13)

In [5]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [6]:
y_train = scaler.fit_transform(y_train.to_frame())
y_test = scaler.transform(y_test.to_frame())

In [7]:
import numpy as np

In [8]:
y_train = y_train.ravel()
y_test = y_test.ravel()

In [9]:
y_train.shape, y_test.shape

((17500,), (7500,))

In [10]:
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV, ElasticNetCV, SGDRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [11]:
models = {
    "basic Linear regression" : LinearRegression(),
    "Ridge" : RidgeCV(),
    "Lasso" : LassoCV(),
    "ElasticNet" : ElasticNetCV(),
    "SGD Regressor" : SGDRegressor(),
    "Random Forest" : RandomForestRegressor(),
    "Gradient Boosting Regressor" : GradientBoostingRegressor(),
    "Adaboost" : AdaBoostRegressor()     
}

In [12]:
results = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    MAE = mean_absolute_error(y_test, y_pred)
    MSE = mean_squared_error(y_test, y_pred)
    R2 = r2_score(y_test, y_pred)
    results[name] = { "MAE" : MAE, "MSE" : MSE, "R2" : R2}
    print(f'{name} model completed')

basic Linear regression model completed
Ridge model completed
Lasso model completed
ElasticNet model completed
SGD Regressor model completed
Random Forest model completed
Gradient Boosting Regressor model completed
Adaboost model completed


In [13]:
metrics = pd.DataFrame(results).T # transpose the data 
metrics

Unnamed: 0,MAE,MSE,R2
basic Linear regression,0.7704033,0.9948264,0.001155515
Ridge,0.7703927,0.9946733,0.001309205
Lasso,0.7703549,0.9945232,0.001459931
ElasticNet,0.7703547,0.9945277,0.001455386
SGD Regressor,1406508000000000.0,1.9782869999999998e+30,-1.9862769999999998e+30
Random Forest,0.7653865,0.9836797,0.01234725
Gradient Boosting Regressor,0.7644163,0.9841225,0.01190267
Adaboost,0.8828412,1.108622,-0.1130997


In [14]:
X.shape

(25000, 33)

# testing with a ANN 

In [15]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, ReLU, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras import metrics

In [16]:
regressor = Sequential()
regressor.add(Dense(units = X.shape[1], activation = 'relu'))
regressor.add(Dropout(0.5))
regressor.add(Dense(units = 25, activation = 'relu'))
regressor.add(Dropout(0.3))
regressor.add(Dense(units = 25, activation = 'relu'))
regressor.add(Dropout(0.3))
regressor.add(Dense(units = 25, activation = 'relu'))
regressor.add(Dropout(0.3))
regressor.add(Dense(units = 15, activation = 'relu'))
regressor.add(Dropout(0.2))
regressor.add(Dense(units = 10, activation = 'relu'))
regressor.add(Dropout(0.1))
regressor.add(Dense(units = 5, activation = 'relu'))
regressor.add(Dropout(0.1))
regressor.add(Dense(units = 1, activation = 'linear'))

In [17]:
optimizer = Adam(learning_rate = 0.01)
early_stopping = EarlyStopping(
    monitor="val_loss",
    min_delta=0.001,
    patience=20,
    verbose=1,
    mode="auto",
    baseline=None,
    restore_best_weights=False,
    start_from_epoch=0,
)
reduce_lr = ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.01,
    patience=20,
    verbose=0,
    mode="auto",
    min_delta=0.001,
    cooldown=0,
    min_lr=0.00000001,
)

In [18]:
regressor.compile(optimizer = optimizer, loss = 'mean_squared_error', metrics=[
        metrics.MeanAbsoluteError(),
        metrics.RootMeanSquaredError(),
        metrics.R2Score()
    ])

In [19]:
model_history = regressor.fit(X_train,y_train, validation_split = 0.3, batch_size = 10, epochs = 1000, callbacks = [early_stopping, reduce_lr])

Epoch 1/1000
[1m1225/1225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 119.7611 - mean_absolute_error: 2.2964 - r2_score: -149.6378 - root_mean_squared_error: 8.1483 - val_loss: 1.0263 - val_mean_absolute_error: 0.7714 - val_r2_score: -0.0057 - val_root_mean_squared_error: 1.0131 - learning_rate: 0.0100
Epoch 2/1000
[1m1225/1225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 1.0913 - mean_absolute_error: 0.7881 - r2_score: -0.0780 - root_mean_squared_error: 1.0443 - val_loss: 1.0243 - val_mean_absolute_error: 0.7739 - val_r2_score: -0.0038 - val_root_mean_squared_error: 1.0121 - learning_rate: 0.0100
Epoch 3/1000
[1m1225/1225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 1.0423 - mean_absolute_error: 0.7789 - r2_score: -0.0418 - root_mean_squared_error: 1.0206 - val_loss: 1.0213 - val_mean_absolute_error: 0.7802 - val_r2_score: -8.4937e-04 - val_root_mean_squared_error: 1.0106 - learning_rate: 0.0100
Epoch 4/