## **IMPORTING MODULES**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data=pd.read_csv("../input/insurance/insurance.csv")
data.head()

## **EXPLORATORY DATA ANALYSIS**

In [None]:
data.shape

In [None]:
data.describe()

In [None]:
data['region'].unique()
data['region'].value_counts()

## **FINDING OUTLIERS**

In [None]:
plt.boxplot(data["charges"])
plt.show()

In [None]:
plt.boxplot(data["age"])
plt.show()

In [None]:
plt.boxplot(data["bmi"])
plt.show()

In [None]:
data=data[(data["bmi"]<49)]
data.head()

In [None]:
plt.boxplot(data["bmi"])
plt.show()

In [None]:
print("skeweness:",data["charges"].skew())
print("kurtosis:",data["charges"].kurt())

In [None]:
data=data[(data["charges"]<50000)]
data.head()

In [None]:
plt.boxplot(data["charges"])
plt.show()

In [None]:
data.shape

## **CHECKING FOR NULL VALUES**

In [None]:
data.isnull().sum()

 ## **FEATURE ENGINEERING**

In [None]:
change_region={'southwest':0, 'southeast':1, 'northwest':2, 'northeast':3}
data['region']= data.region.map(change_region)
data.head()

In [None]:
change_smoker={'no':0, 'yes':1}
data['smoker']= data.smoker.map(change_smoker)
data.head()

In [None]:
change_sex={'male':0, 'female':1}
data['sex']= data.sex.map(change_sex)
data.head()

 ## **FINDING PREDICTIVE VARIABLES**

In [None]:
plt.scatter(data["age"],data["charges"])
sns.regplot(x=data['age'],y=data['charges'],color="green")
plt.show()

In [None]:
sns.scatterplot(data["bmi"],data["charges"],hue=data['sex'])
plt.show()

In [None]:
plt.scatter(data["bmi"],data["charges"])
sns.regplot(x=data['bmi'],y=data['charges'],color="blue")
plt.show()

In [None]:
plt.scatter(data["children"],data["charges"])
sns.regplot(x=data['children'],y=data['charges'],color="blue")
plt.show()

In [None]:
sns.scatterplot(data["bmi"],data["charges"],hue=data['smoker'])
plt.show()

In [None]:
plt.scatter(data["region"],data["charges"])
sns.regplot(x=data['region'],y=data['charges'],color="blue")
plt.show()

## **FITTING LINEAR MODEL AND PREDICTING CHARGES**

In [None]:
X=data.drop(["charges"],axis=1)
y=data["charges"]

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)

In [None]:
from sklearn.linear_model import LinearRegression
model=LinearRegression()
model.fit(X_train,y_train)

In [None]:
y_pred=model.predict(X_test)

In [None]:
accuracy=model.score(X_test,y_test)
print("model accuracy:",accuracy*100)

In [None]:
intercept=model.intercept_
print("Intercept:",intercept)
coef=model.coef_
print("Coef:",coef)