# Insurance Charges Prediction
This notebook analyzes the insurance dataset and builds a regression model to predict insurance charges.

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score

# Load the dataset
data = pd.read_csv('insurance.csv')

# Display first few rows
data.head()

## Data Preprocessing

In [None]:
# Encode categorical variables
data = pd.get_dummies(data, columns=['sex', 'smoker', 'region'], drop_first=True)

# Check for missing values
print(data.isnull().sum())

## Exploratory Data Analysis

In [None]:
# Visualize relationships
sns.pairplot(data, x_vars=['age', 'bmi', 'children'], y_vars='charges', kind='reg')
plt.show()

# Correlation heatmap
sns.heatmap(data.corr(), annot=True, cmap='coolwarm')
plt.show()

## Model Building

In [None]:
# Split data into features and target
X = data.drop('charges', axis=1)
y = data['charges']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Absolute Error: {mae}')
print(f'R-squared: {r2}')

## Conclusion
This notebook provided insights into the insurance dataset and predicted charges using linear regression. Further improvements can be made by exploring advanced regression techniques or feature engineering.