## Insurance Premium Prediction

**Import data and required packages**

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import GridSearchCV, train_test_split
import warnings

ModuleNotFoundError: No module named 'numpy.strings'

In [None]:
url = 'https://raw.githubusercontent.com/nikhil-xyz/datasets/main/insurance.csv'
df = pd.read_csv(url)

In [None]:
df.head()

Preparing X and y variables

In [None]:
X = df.drop(columns=['expenses'], axis=1)
y = df['expenses']

In [None]:
X.head()

In [None]:
y.head()

Seperating Numerical and Categorical features

In [None]:
X.info()

In [None]:
categorical_features = list(X.select_dtypes(include='object').columns)
numerical_features = list(X.select_dtypes(exclude='object').columns)

In [None]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

In [None]:
numerical_pipeline = Pipeline(
    steps = [
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler())
    ]
)

categorical_pipeline = Pipeline(
    steps = [
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('encoder', OneHotEncoder()),
        ('scaler', StandardScaler(with_mean=False))
    ]
)

In [None]:
preprocessor = ColumnTransformer([
    ('numerical_pipeline', numerical_pipeline, numerical_features),
    ('categorical_pipeline', categorical_pipeline, categorical_features)
])

In [None]:
X = preprocessor.fit_transform(X)

In [None]:
X.shape

In [None]:
X

In [None]:
X[0]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=30, test_size=0.2)

In [None]:
X_train.shape, X_test.shape

Create an Evaluate Function to give all metrics after model Training

In [None]:
def evaluate_model(true, predicted):
    mse = mean_squared_error(true, predicted)
    mae = mean_absolute_error(true, predicted)
    r2 = r2_score(true, predicted)
    return mse, mae, r2

In [None]:
models = {
    "Ridge" : Ridge(),
    "Lasso" : Lasso(),
    "Linear Regression" : LinearRegression(),
    "Decision Tree Regressor" : DecisionTreeRegressor(),
    "K-Neighbour Regressor" : KNeighborsRegressor(),
    "AdaBoost Regressor" : AdaBoostRegressor(),
    "XGB Regressor" : XGBRegressor(),
    "Gradient Boosting Regressor" : GradientBoostingRegressor(),
    "Cat Boost Regressor" : CatBoostRegressor()
   
}

In [None]:
model_list = []
r2_list = []

In [None]:
for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train, y_train)

    y_train_predict = model.predict(X_train)
    y_test_predict = model.predict(X_test)

    mse, mae, r2 = evaluate_model(y_test, y_test_predict)

    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])
    r2_list.append(r2)

    print("Mean Squared Error : {:.4f}".format(mse))
    print("Mean Absolute Error : {:.4f}".format(mae))
    print("R2 Score : {:.4f}".format(r2))
    print("_______________________________________")

**Result**

In [None]:
pd.DataFrame(list(zip(model_list, r2_list)), columns=['Model Name', 'R2_Score']).sort_values(by=["R2_Score"],ascending=False)

In [None]:
model = GradientBoostingRegressor()
model = model.fit(X_train, y_train)
y_pred = model.predict(X_test)
score = r2_score(y_test, y_pred)*100
print(" R2 score is {}".format(score))

In [None]:
temp = pd.DataFrame([[18, 'male', 33.8, 0, 'no', 'southeast']], columns=['age', 'sex', 'bmi', 'children', 'smoker', 'region'])

In [None]:
temp

In [None]:
temp = preprocessor.transform(temp)

In [None]:
temp

In [None]:
model.predict(temp)[0]

In [None]:
df.head()