# Medical Cost
* Building a regression neural network to make predictions on personal medical cost using [this](https://www.kaggle.com/mirichoi0218/insurance) dataset from [Kaggle](https://www.kaggle.com/)

In [None]:
# Imports
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

In [None]:
# Fetch the data from Github
insurence_data = pd.read_csv('https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv')
insurence_data.head(10)

## Preprocessing data (normalization and standardization)

In [None]:
# Create a column transformer

ct = make_column_transformer(
    (MinMaxScaler(), ['age', 'bmi', 'children']), # turn values betw. 0 and 1
    (OneHotEncoder(handle_unknown='ignore'), ['sex', 'smoker', 'region'])
)

In [None]:
# Create features and labels

X = insurence_data.drop('charges', axis=1)
y = insurence_data['charges']

X.head(3), y.head(3)

In [None]:
# Create training and testing set

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
len(X), len(X_train), len(X_test)

In [None]:
# Fit column transformer to the training data
ct.fit(X_train)

In [None]:
# Transform training and test data with normalization

X_train_normal = ct.transform(X_train)
X_test_normal = ct.transform(X_test)

In [None]:
# Looking at the data
X_train.shape, X_train_normal.shape

In [None]:
X_train_normal

## Build the model

In [None]:
tf.random.set_seed(42)

insurence_model = tf.keras.Sequential([
  tf.keras.layers.Dense(100, activation="relu"),
  tf.keras.layers.Dense(33, activation="relu"),
  tf.keras.layers.Dense(1)
])

insurence_model.compile(loss=tf.keras.losses.mae,
                        optimizer=tf.keras.optimizers.Adam(lr=0.3),
                        metrics=['mae'])

trainingStopCallback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=0, verbose=0,mode='auto', baseline=None, restore_best_weights=False)
history = insurence_model.fit(X_train_normal, y_train, epochs=1000, verbose=0, batch_size=32, callbacks=[trainingStopCallback])

In [None]:
# Evaluate the model
insurence_model.evaluate(X_test_normal, y_test)

In [None]:
# Plot history
pd.DataFrame(history.history).plot()
plt.ylabel('loss')
plt.xlabel('epochs')