# Medical Insurance Cost Prediction

## Import basic libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

## Get data

In [2]:
insurance = pd.read_csv("https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv")
insurance.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


## Import libraries for variable transformation

In [3]:
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.model_selection import train_test_split

## Data preprocessing

In [4]:
ct = make_column_transformer(
    (MinMaxScaler(), ['age', 'bmi', 'children', ]),
    (OneHotEncoder(), ['sex', 'smoker', 'region'])
)

X = insurance.drop('charges', axis=1)
y = insurance['charges']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

ct.fit(X_train)

X_train_normal = ct.transform(X_train)
X_test_normal = ct.transform(X_test)

In [5]:
X_train = pd.DataFrame(X_train)
X_train_normal = pd.DataFrame(X_train_normal)

In [6]:
X_test_normal = pd.DataFrame(X_test_normal)

In [7]:
X_train.iloc[0,:], X_train_normal.iloc[0,:]

(age                46
 sex            female
 bmi             19.95
 children            2
 smoker             no
 region      northwest
 Name: 560, dtype: object, 0     0.608696
 1     0.107345
 2     0.400000
 3     1.000000
 4     0.000000
 5     1.000000
 6     0.000000
 7     0.000000
 8     1.000000
 9     0.000000
 10    0.000000
 Name: 0, dtype: float64)

In [8]:
X_train.shape, X_train_normal.shape

((1070, 6), (1070, 11))

## Modeling

In [9]:
tf.random.set_seed(42)

model = tf.keras.Sequential([
    tf.keras.layers.Dense(100),
    tf.keras.layers.Dense(10),
    tf.keras.layers.Dense(1)
])

model.compile(loss=tf.keras.losses.mae,
              optimizer=tf.keras.optimizers.Adam(),
              metrics=['mae'])

model.fit(X_train_normal, y_train, epochs=200, verbose=0)

<keras.callbacks.History at 0x7f2333626670>

## Evaluate model

In [10]:
model.evaluate(X_test_normal, y_test)



[3180.362060546875, 3180.362060546875]

## Save model

In [10]:
model.save('best_model')