In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import sys

In [None]:
df=pd.read_csv("/kaggle/input/insurance/insurance.csv")
df.head() # check data

In [None]:
# check Null values
df.isnull().sum()

In [None]:
# check normalization distribution data
plt.figure()
df[['age','bmi','children','charges']].diff().hist(color='m',alpha=0.8, figsize=(10,7))


In [None]:
#check Duplicate Values
df.duplicated().sum()


In [None]:
# remove Duplicate Values
df.drop_duplicates(inplace=True)
df.duplicated().sum()

In [None]:
#check Co-relation 
sns.heatmap(df.corr(),annot=True,cmap='coolwarm')

In [None]:
#scatter plot
plt.figure(figsize=(10,5))
sns.scatterplot(x='bmi',y='charges',data=df)

In [None]:
# Encode Categorical Data
from sklearn.preprocessing import LabelEncoder
for col in df.columns:
    if df[col].dtype == 'object':
        lbl=LabelEncoder()
        lbl.fit(list(df[col].values))
        df[col]=lbl.transform(df[col].values)

In [None]:
df.head() # see after Encode data

In [None]:
#Split and Scaling Data
X=df.drop('charges',axis=1)
y=df['charges']
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=.20, random_state=45)

from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)

In [None]:
#Model Training and Testing
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import AdaBoostRegressor,GradientBoostingRegressor,RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRFRegressor

In [None]:
lr=LinearRegression()

knn = KNeighborsRegressor(n_neighbors=10)

dt = DecisionTreeRegressor(max_depth = 3)

rf = RandomForestRegressor(max_depth = 3, n_estimators=500)

ada = AdaBoostRegressor( n_estimators=50, learning_rate =.01)

gbr = GradientBoostingRegressor(max_depth=2, n_estimators=100, learning_rate =.2)



regressors = [('Linear Regression', lr), ('K Nearest Neighbours', knn),
               ('Decision Tree', dt), ('Random Forest', rf), ('AdaBoost', ada)]

In [None]:
# Accuracy of model
from sklearn.metrics import r2_score
for regressor_name, regressor in regressors:
 
    # Fit regressor to the training set
    regressor.fit(X_train, y_train)    
   
    # Predict 
    y_pred = regressor.predict(X_test)
    accuracy = round(r2_score(y_test,y_pred),1)*100
    

   
    # Evaluate  accuracy on the test set
    print('{:s} : {:.0f} %'.format(regressor_name, accuracy))
    plt.rcParams["figure.figsize"] = (10,8)
    plt.bar(regressor_name,accuracy)