# 🏥 Medical Insurance Premium Prediction with ANN

This notebook trains an Artificial Neural Network (ANN) to predict insurance premiums.

✅ Only `X` (features) are scaled.
✅ `y` (insurance charges) is kept in original dollars.
✅ Model and scaler are exported for use in Streamlit app.


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
import joblib


## 📂 Load Dataset

In [3]:
insurance_df = pd.read_csv('/content/drive/MyDrive/insurance.csv')  # ensure dataset is available
insurance_df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


## 🔄 Encode Categorical Variables

In [4]:
# Encode sex
insurance_df['sex'] = insurance_df['sex'].apply(lambda x: 1 if x == 'male' else 0)

# Encode smoker
insurance_df['smoker'] = insurance_df['smoker'].apply(lambda x: 1 if x == 'yes' else 0)

# One-hot encode region
insurance_df = pd.get_dummies(insurance_df, columns=['region'], drop_first=True)
insurance_df.head()

Unnamed: 0,age,sex,bmi,children,smoker,charges,region_northwest,region_southeast,region_southwest
0,19,0,27.9,0,1,16884.924,False,False,True
1,18,1,33.77,1,0,1725.5523,False,True,False
2,28,1,33.0,3,0,4449.462,False,True,False
3,33,1,22.705,0,0,21984.47061,True,False,False
4,32,1,28.88,0,0,3866.8552,True,False,False


## ✂ Features (X) and Target (y)

In [5]:
X = insurance_df.drop(columns=['charges'])
y = insurance_df['charges']  # keep original values

print("Features:", X.shape)
print("Target:", y.shape)

Features: (1338, 8)
Target: (1338,)


## ⚖ Scale Features (only X)

In [6]:
scaler_x = StandardScaler()
X_scaled = scaler_x.fit_transform(X)

# Save scaler for Streamlit app
joblib.dump(scaler_x, "scaler_x.pkl")

['scaler_x.pkl']

## 🧠 Build ANN Model

In [7]:
ANN_model = Sequential([
    Dense(50, activation='relu', input_shape=(X_scaled.shape[1],)),
    Dense(150, activation='relu'),
    Dropout(0.2),
    Dense(150, activation='relu'),
    Dropout(0.2),
    Dense(50, activation='relu'),
    Dense(1, activation='linear')
])

ANN_model.compile(optimizer='adam', loss='mse', metrics=['mae'])
ANN_model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


## 🚂 Train ANN

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

history = ANN_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=200,
    batch_size=32,
    verbose=1
)

Epoch 1/200
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - loss: 319654464.0000 - mae: 13221.1406 - val_loss: 322269440.0000 - val_mae: 12925.6416
Epoch 2/200
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 323052896.0000 - mae: 13221.9062 - val_loss: 296449952.0000 - val_mae: 12004.6445
Epoch 3/200
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 278256064.0000 - mae: 11610.1436 - val_loss: 145083344.0000 - val_mae: 7377.4487
Epoch 4/200
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 105617224.0000 - mae: 6980.2534 - val_loss: 70993728.0000 - val_mae: 7114.5122
Epoch 5/200
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 57906648.0000 - mae: 5895.5264 - val_loss: 50285492.0000 - val_mae: 5805.2124
Epoch 6/200
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 46253540.0000 - mae: 5319.0029 - val_lo

## 💾 Save Model

In [9]:
ANN_model.save("insurance_model.keras")
print("Model and scaler saved successfully!")

Model and scaler saved successfully!
