In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
df = pd.read_csv('insurance.csv')
df.head()

In [None]:
cat_cols = ['sex', 'smoker', 'region']
df[cat_cols] = df[cat_cols].apply(LabelEncoder().fit_transform)
print(df.head())

In [None]:
X = df.drop(columns='charges')
y = df['charges']
X = StandardScaler().fit_transform(X)
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=0.2, 
                                                              random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.2, 
                                                  random_state=42)

In [None]:
overfit_model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='linear'),
])

In [None]:
reg_model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dropout(0.1),
    Dense(64, activation='relu', kernel_regularizer=l2(0.001)),
    Dropout(0.1),
    Dense(1, activation='linear'),
])

In [None]:
print("\nOverfit Model Summary:")
overfit_model.summary()

In [None]:
print("\nRegularized Model Summary:")
reg_model.summary()

In [None]:
overfit_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae', 'mse'])
reg_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae', 'mse'])

In [None]:
history_overfit = overfit_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=20,
    batch_size=16,
    verbose=1
)

In [None]:
history_reg = reg_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=20,
    batch_size=16,
    verbose=1
)

In [None]:
plt.figure(figsize=(14, 6))
plt.subplot(1, 2, 1)
plt.plot(history_overfit.history['val_loss'], label='Overfit Val Loss', color='blue', linestyle='--')
plt.plot(history_reg.history['val_loss'], label='Regularized Val Loss', color='brown', linestyle='--')
plt.title('Model Loss Comparison (MSE)')
plt.xlabel('Epoch')
plt.ylabel('Mean Squared Error Loss')
plt.legend()
plt.grid(True)

In [None]:
plt.plot(history_overfit.history['val_mae'], label='Overfit Val MAE', color='blue')
plt.plot(history_reg.history['val_mae'], label='Regularized Val MAE', color='brown')
plt.title('Model Mean Absolute Error Comparison')
plt.xlabel('Epoch')
plt.ylabel('Mean Absolute Error')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
loss_overfit, mae_overfit, mse_overfit_eval = overfit_model.evaluate(X_test, y_test, verbose=0)
y_pred_overfit = overfit_model.predict(X_test, verbose=0).flatten()
r2_overfit = r2_score(y_test, y_pred_overfit)

loss_reg, mae_reg, mse_reg_eval = reg_model.evaluate(X_test, y_test, verbose=0)
y_pred_reg = reg_model.predict(X_test, verbose=0).flatten()
r2_reg = r2_score(y_test, y_pred_reg)

In [None]:
print("\nTest Set Performance:")
print(f"Overfit Model     - MSE: {mse_overfit_eval:.2f}, MAE: {mae_overfit:.2f}, R2: {r2_overfit:.4f}")
print(f"Regularized Model - MSE: {mse_reg_eval:.2f}, MAE: {mae_reg:.2f}, R2: {r2_reg:.4f}")

In [None]:
plt.figure(figsize=(10, 7))
plt.scatter(y_test, y_pred_overfit, color='skyblue', edgecolors='k', linewidth=0.5, label=f'Overfit Model (R2={r2_overfit:.3f})')
plt.scatter(y_test, y_pred_reg, color='sandybrown', edgecolors='k', linewidth=0.5, label=f'Regularized Model (R2={r2_reg:.3f})')
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='black', linestyle='--', linewidth=2, label='Perfect Prediction')
plt.title('Actual vs Predicted Charges (Test Set)')
plt.xlabel('Actual Charges ($)')
plt.ylabel('Predicted Charges ($)')
plt.legend()
plt.grid(True)
plt.show()