###Importing the libraries

In [147]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from xgboost import XGBRegressor

from sklearn import set_config
set_config(display="diagram")

###Loading dataset & selecting independent feature

In [148]:
df = pd.read_csv('/content/insurance.csv')
y = df.charges
df.drop(columns='charges', inplace=True)

###Creating dataset with numeric features

In [149]:
dfn = df.select_dtypes(include=['int64', 'float64'])

###Creating dataset with categorical features

In [150]:
dfo = df.select_dtypes(include='object')

###Creating pipeline

In [151]:
numeric_transformer = Pipeline(
    steps = [
             ('scaler', StandardScaler())
    ]
)

categorical_transformer = Pipeline(
    steps = [
             ('encode', OneHotEncoder(handle_unknown='ignore')),
    ]
)


preprocessor = ColumnTransformer(
    transformers=[
                 ('num', numeric_transformer, dfn.columns),
                 ('cat', categorical_transformer, dfo.columns),
    ]
)

In [152]:
regressor = Pipeline(
    steps = [
             ('preprocessor', preprocessor),
             ('regressor', XGBRegressor(objective='reg:squarederror'))
    ]
)

###Train test split

In [153]:
X_train, X_test, y_train, y_test = train_test_split(df, y.values, test_size=.2, random_state=1)

###Model

In [154]:
regressor.fit(X_train, y_train)

###Accuracy

In [155]:
regressor.score(X_test, y_test)

0.8797596683054297

###Prediction

In [156]:
regressor.predict(df.iloc[4:5, :])[0]

4606.4453

###Cross validation score & standard deviation

In [157]:
accuracy = cross_val_score(estimator=regressor, X = df, y = y, cv=10)
print(f'Accuracy: {accuracy.mean()*100 :.2f} %')
print(f'Standard Deviation: {accuracy.std()*100 :.2f} %')

Accuracy: 85.70 %
Standard Deviation: 4.47 %
