In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt

# Step 1: Import the data
!wget https://cdn.freecodecamp.org/project-data/health-costs/insurance.csv
dataset = pd.read_csv('insurance.csv')

# Step 2: Convert categorical columns into numeric values using one-hot encoding
dataset = pd.get_dummies(dataset, columns=['sex', 'smoker', 'region'], drop_first=True)

# Step 3: Split the dataset into training and testing sets (80% train, 20% test)
train_dataset, test_dataset = train_test_split(dataset, test_size=0.2, random_state=42)

# Step 4: Separate features (X) and target labels (y) from the datasets
train_labels = train_dataset.pop('expenses')
test_labels = test_dataset.pop('expenses')

# Step 5: Create and train the Linear Regression model
model = LinearRegression()
model.fit(train_dataset, train_labels)

# Step 6: Evaluate the model on the test dataset
predictions = model.predict(test_dataset)
mae = mean_absolute_error(test_labels, predictions)

# Output the Mean Absolute Error (MAE)
print("Mean Absolute Error:", mae)

# Step 7: Visualize the predictions vs actual values
plt.figure(figsize=(10,6))
plt.scatter(test_labels, predictions)
plt.plot([0, max(test_labels)], [0, max(predictions)], color='red', lw=2)
plt.xlabel("Actual Expenses")
plt.ylabel("Predicted Expenses")
plt.title("Actual vs Predicted Healthcare Expenses")
plt.show()

# --- The testing cell below ---
# RUN THIS CELL TO TEST YOUR MODEL. DO NOT MODIFY CONTENTS.
# Test model by checking how well the model generalizes using the test set.

# Importing necessary libraries for evaluation
from sklearn.metrics import mean_absolute_error

# Evaluate the model
loss, mae, mse = model.evaluate(test_dataset, test_labels, verbose=2) # Assuming 'model' is defined in previous cells.


print("Testing set Mean Abs Error: {:5.2f} expenses".format(mae))

if mae < 3500:
  print("You passed the challenge. Great job!")
else:
  print("The Mean Abs Error must be less than 3500. Keep trying.")

# Plot predictions.
test_predictions = model.predict(test_dataset).flatten()

a = plt.axes(aspect='equal')
plt.scatter(test_labels, test_predictions)
plt.xlabel('True values (expenses)')
plt.ylabel('Predictions (expenses)')
lims = [0, 50000]
plt.xlim(lims)
plt.ylim(lims)
_ = plt.plot(lims,lims)
