In [44]:
# Step 1: Import the required libraries
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score,mean_squared_error

In [21]:
# Step 2: Load the dataset into a pandas dataframe
data = pd.read_csv('insurance.csv')
data.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [22]:
# Step 3: Preprocess the data
# Separate the input features and target variable
X = data.drop('charges', axis=1)
y = data['charges']

In [23]:
# One-hot encode the categorical features
categorical_features = ['sex', 'smoker', 'region']
categorical_transformer = OneHotEncoder()
numerical_features = ['age', 'bmi', 'children']
numerical_transformer = StandardScaler()

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_features),
        ('num', numerical_transformer, numerical_features)
    ])

X = preprocessor.fit_transform(X)

In [24]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [25]:
# Step 4: Build the neural network model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, input_shape=(X_train.shape[1],), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.Dense(32, activation='relu',kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.Dense(1)
])

In [26]:
# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [35]:
# Step 5: Train the model
history = model.fit(X_train, y_train,epochs=100, verbose=0)

In [36]:
y_pred=model.predict(X_test)



In [37]:
# Step 6: Evaluate the model's performance
test_loss, test_mae = model.evaluate(X_test, y_test, verbose=0)
print(f'Test loss: {test_loss:.2f}, Test MAE: {test_mae:.2f}')

Test loss: 32803534.00, Test MAE: 4048.89


In [41]:
r2_score(y_test,y_pred)

0.7887037464933656

In [43]:
mean_squared_error(y_test,y_pred)

32803475.32586919

In [14]:

# Step 7: Use the trained model to make predictions on new data
new_data = pd.DataFrame({
    'age': [30],
    'sex': ['male'],
    'bmi': [25.5],
    'children': [2],
    'smoker': ['no'],
    'region': ['northwest']
})
new_data = preprocessor.transform(new_data)
prediction = model.predict(new_data)[0][0]
print('Predicted charges:', prediction)


Predicted charges: 4443.7354
