# Linear Regression Health Costs Calculator
This notebook trains a linear regression model to predict healthcare costs based on various personal attributes.

In [None]:
# 1. Import libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [None]:
# 2. Load data
url = 'https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv'
df = pd.read_csv(url)
df.head()

In [None]:
# 3. Preprocess data
df['sex'] = df['sex'].map({'male': 0, 'female': 1})
df['smoker'] = df['smoker'].map({'yes': 1, 'no': 0})
df = pd.get_dummies(df, columns=['region'], drop_first=True)

# Split into features and labels
X = df.drop('charges', axis=1)
y = df['charges']

# Train-test split
train_dataset, test_dataset, train_labels, test_labels = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
train_dataset = scaler.fit_transform(train_dataset)
test_dataset = scaler.transform(test_dataset)

In [None]:
# 4. Build the model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

model.compile(optimizer='adam', loss='mae', metrics=['mae'])
model.summary()

In [None]:
# 5. Train the model
history = model.fit(train_dataset, train_labels, epochs=100, validation_split=0.2, verbose=0)

In [None]:
# 6. Evaluate the model
loss, mae = model.evaluate(test_dataset, test_labels)
print(f"Mean Absolute Error: ${mae:.2f}")

In [None]:
# 7. Plot predictions vs true values
predictions = model.predict(test_dataset).flatten()

plt.figure(figsize=(10,6))
plt.scatter(test_labels, predictions)
plt.xlabel("True Values")
plt.ylabel("Predictions")
plt.plot([0, max(test_labels)], [0, max(test_labels)], color='red')
plt.title("True Charges vs Predicted Charges")
plt.show()