In [None]:
# Import libraries. You may or may not use all of these.
!pip install -q git+https://github.com/tensorflow/docs
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling

In [None]:
# Import data
!wget https://cdn.freecodecamp.org/project-data/health-costs/insurance.csv
dataset = pd.read_csv('insurance.csv')

In [None]:
print(dataset.tail())

In [None]:
dataset = pd.get_dummies(dataset, columns=['sex'], prefix='', prefix_sep='')

In [None]:
print(dataset.tail())

In [None]:
dataset = pd.get_dummies(dataset, columns=['smoker'], prefix='', prefix_sep='')

In [None]:
dataset = dataset.rename(columns={'no': 'not_smoker'})
dataset = dataset.rename(columns={'yes': 'smoker'})

In [None]:
print(dataset.tail())

In [None]:
dataset = pd.get_dummies(dataset, columns=['region'], prefix='', prefix_sep='')

In [None]:
print(dataset.tail())

In [None]:
dataset = dataset.replace({True: 1, False: 0})

In [None]:
print(dataset.tail())

In [None]:
# Split data into a training and test set

train_dataset = dataset.sample(frac=0.8, random_state=0)
test_dataset = dataset.drop(train_dataset.index)

In [None]:
print(train_dataset)

In [None]:
print(test_dataset)

In [None]:
# Data visualisation

sns.pairplot(train_dataset[['age', 'bmi', 'children', 'expenses']], diag_kind='kde')

In [None]:
# Data statistics

print(train_dataset.describe().transpose())

In [None]:
# Get label dataset

train_features = train_dataset.copy()
test_features = test_dataset.copy()

train_labels = train_features.pop('expenses')
test_labels = test_features.pop('expenses')

In [None]:
# Build the linear regression model
model = Sequential()
model.add(Dense(1, input_dim=train_dataset.shape[1], activation='linear'))

# Compile the model
model.compile(optimizer='adam',
              loss='mean_absolute_error',
              metrics=["mean_absolute_error", "mean_squared_error"])

# Stop training when a model stops improving
monitor = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model
history = model.fit(
    train_dataset, train_labels,
    validation_data=(test_dataset, test_labels),
    epochs=100,
    batch_size=32,
    callbacks=[monitor],
    verbose=1
)

# Evaluate the model on the test set
loss, mae, mse = model.evaluate(test_dataset, test_labels, verbose=0)
print(f'Test Mean Absolute Error: {mae:.2f}')
print(f'Test Mean Square Error: {mse:.2f}')

In [None]:
a = plt.axes(aspect='equal')
plt.scatter(test_labels, test_predictions)
plt.xlabel('True values (expenses)')
plt.ylabel('Predictions (expenses)')
lims = [0, 50000]
plt.xlim(lims)
plt.ylim(lims)
_ = plt.plot(lims,lims)

In [None]:
predictors = pd.DataFrame(dataset.drop('expenses', axis=1))
response = pd.DataFrame(dataset['expenses'])

In [None]:
print(predictors)

In [None]:
print(response)

In [None]:
# RUN THIS CELL TO TEST YOUR MODEL. DO NOT MODIFY CONTENTS.
# Test model by checking how well the model generalizes using the test set.
loss, mae, mse = model.evaluate(test_dataset, test_labels, verbose=2)

print("Testing set Mean Abs Error: {:5.2f} expenses".format(mae))

if mae < 3500:
  print("You passed the challenge. Great job!")
else:
  print("The Mean Abs Error must be less than 3500. Keep trying.")

# Plot predictions.
test_predictions = model.predict(test_dataset).flatten()

a = plt.axes(aspect='equal')
plt.scatter(test_labels, test_predictions)
plt.xlabel('True values (expenses)')
plt.ylabel('Predictions (expenses)')
lims = [0, 50000]
plt.xlim(lims)
plt.ylim(lims)
_ = plt.plot(lims,lims)
