In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error,mean_squared_error

In [None]:
df = pd.read_csv("insurance.csv")
df.head()

In [None]:
df.describe()

In [None]:
le = LabelEncoder()
df["sex_idx"] = le.fit_transform(df["sex"])
df["smoker_idx"] = le.fit_transform(df["smoker"])
df["region_idx"] = le.fit_transform(df["region"])

In [None]:
X = df[["age", "sex_idx", "smoker_idx", "region_idx", "bmi", "children"]]
y = df['charges']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [None]:
model = LinearRegression()
model.fit(X_train,y_train)

In [None]:
predictions = model.predict(X_test)

In [None]:
MAE = mean_absolute_error(y_test,predictions)
MSE = mean_squared_error(y_test,predictions)
RMSE = np.sqrt(MSE)

print("MAE: %f" % (MAE))
print("RMSE: %f" % (RMSE))

In [None]:
residuals = y_test - predictions
sns.scatterplot(x=y_test,y=residuals)
plt.axhline(y=0, color='r', linestyle='--')
plt.xlabel("Charges")
plt.ylabel("Residuals")
plt.title("Residual Plot")
plt.show()