In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
df = pd.read_csv('medical_cost.csv')
df.head()

Unnamed: 0,Id,age,sex,bmi,children,smoker,region,charges
0,1,19,female,27.9,0,yes,southwest,16884.924
1,2,18,male,33.77,1,no,southeast,1725.5523
2,3,28,male,33.0,3,no,southeast,4449.462
3,4,33,male,22.705,0,no,northwest,21984.47061
4,5,32,male,28.88,0,no,northwest,3866.8552


In [3]:
X = df.drop(columns=['charges'])
y = df['charges']

In [4]:
# Define the preprocessing for numeric and categorical features
numeric_features = ['age', 'bmi', 'children']
categorical_features = ['sex', 'smoker', 'region']

In [5]:
# Create transformers for preprocessing
numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

In [6]:
# Create the preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Preprocess the data
X_preprocessed = preprocessor.fit_transform(X)

In [7]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)

In [8]:
# Define and train the Linear Regression model
lr_model = Ridge()
lr_model.fit(X_train, y_train)
lr_train_preds = lr_model.predict(X_train)
lr_test_preds = lr_model.predict(X_test)

In [9]:
# Build and train the Feedforward Neural Network model
fnn_model = Sequential([
    Dense(128, input_dim=X_train.shape[1], activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1)  # Single output for regression
])

fnn_model.compile(optimizer='adam', loss='mean_squared_error')
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = fnn_model.fit(X_train, y_train, epochs=200, batch_size=32, validation_split=0.2, callbacks=[early_stopping])


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200


Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200


Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200


In [10]:
# Make predictions with the FNN model
fnn_train_preds = fnn_model.predict(X_train).flatten()
fnn_test_preds = fnn_model.predict(X_test).flatten()



In [11]:
# Combine predictions for the training and testing sets
train_preds_combined = np.column_stack((lr_train_preds, fnn_train_preds))
test_preds_combined = np.column_stack((lr_test_preds, fnn_test_preds))

In [12]:
# Train the meta-model (another Linear Regression)
meta_model = LinearRegression()
meta_model.fit(train_preds_combined, y_train)
meta_test_preds = meta_model.predict(test_preds_combined)

In [13]:
# Evaluate the hybrid model
mse = mean_squared_error(y_test, meta_test_preds)
mae = mean_absolute_error(y_test, meta_test_preds)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, meta_test_preds)

print(f'Mean Squared Error (MSE) of Hybrid Model: {mse}')
print(f'Mean Absolute Error (MAE) of Hybrid Model: {mae}')
print(f'Root Mean Squared Error (RMSE) of Hybrid Model: {rmse}')
print(f'R-squared (R²) of Hybrid Model: {r2}')

Mean Squared Error (MSE) of Hybrid Model: 19849718.42841894
Mean Absolute Error (MAE) of Hybrid Model: 2704.5453848092925
Root Mean Squared Error (RMSE) of Hybrid Model: 4455.302282496547
R-squared (R²) of Hybrid Model: 0.8721424758986146


In [15]:
# Function to predict based on user input
def predict_cost(user_input):
    # Convert user input into DataFrame
    user_df = pd.DataFrame([user_input], columns=X.columns)
    
    # Preprocess user input
    user_preprocessed = preprocessor.transform(user_df)
    
    # Get predictions from base models
    lr_pred = lr_model.predict(user_preprocessed)
    fnn_pred = fnn_model.predict(user_preprocessed).flatten()
    
    # Combine base model predictions
    combined_pred = np.column_stack((lr_pred, fnn_pred))
    
    # Predict using the meta-model
    final_pred = meta_model.predict(combined_pred)
    
    return final_pred[0]

# Ask user for input and predict
print("Enter feature values for prediction:")
age = float(input("Age: "))
bmi = float(input("BMI: "))
children = int(input("Number of Children: "))
sex = input("Sex (male/female): ")
smoker = input("Smoker (yes/no): ")
region = input("Region (northeast/northwest/southeast/southwest): ")

user_input = {
    'age': age,
    'bmi': bmi,
    'children': children,
    'sex': sex,
    'smoker': smoker,
    'region': region
}
predicted_cost = predict_cost(user_input)
print(f"Predicted Insurance Cost: ${predicted_cost:.2f}")

Enter feature values for prediction:
Age: 24
BMI: 23
Number of Children: 0
Sex (male/female): male
Smoker (yes/no): no
Region (northeast/northwest/southeast/southwest): southeast
Predicted Insurance Cost: $3444.77
