### Using Tensorflow to work on a real regression problem from Kaggle

In [None]:
import numpy as np
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
import sklearn

In [None]:
# read a csv data
insurance = pd.read_csv("./data/insurance.csv") # medical cost personal datasets
insurance.head(10)

In [None]:
from sklearn.model_selection import train_test_split

X = insurance.drop("charges", axis=1)
Y = insurance["charges"]

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.8, random_state=42)
X_train.shape, Y_train.shape, X_test.shape, Y_test.shape

In [None]:
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
ct = make_column_transformer(
    (MinMaxScaler(), ['age','bmi', 'children']), 
    (OneHotEncoder(handle_unknown='ignore'), ['sex','smoker','region'])
)

In [None]:
ct.fit(X_train)
x_train_normal = ct.transform(X_train)
X_test_normal = ct.transform(X_test)

In [None]:
# Build a neural network 

# 1. Create a model using the Sequential API
model = tf.keras.Sequential([
    tf.keras.layers.Dense(10, input_shape=[11], name='input_layer'),
    tf.keras.layers.Dense(10,  name='second_layer'),
    tf.keras.layers.Dense(1, name='output_layer')
], name='Insurance_Regression_Model')

# 2. Compile the model
model.compile(loss = tf.keras.losses.mae, 
             optimizer = tf.keras.optimizers.Adam(lr=0.01),  
             metrics=["mae"])

callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3) # When to stop? EarlyStopping Callback

#3. Fit the model
history = model.fit(x_train_normal, Y_train, epochs=200, callbacks=[callback]) #verbose=0

In [None]:
Y_pred = model.predict(X_test_normal)

In [None]:
# calculate some metrics to evaluate the model performance
mae = tf.metrics.mean_absolute_error(Y_test, tf.squeeze(Y_pred))
mae.numpy() # Y_train.mean()

In [None]:
pd.DataFrame(history.history).plot()
plt.xlabel("epochs")
plt.ylabel("loss")
plt.show()