In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error,mean_absolute_error


In [2]:
df=pd.read_csv('/content/sample_data/insurance.csv')
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1338 entries, 0 to 1337
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1338 non-null   int64  
 1   sex       1338 non-null   object 
 2   bmi       1338 non-null   float64
 3   children  1338 non-null   int64  
 4   smoker    1338 non-null   object 
 5   region    1338 non-null   object 
 6   charges   1338 non-null   float64
dtypes: float64(2), int64(2), object(3)
memory usage: 73.3+ KB


In [4]:
df.isnull().sum()

Unnamed: 0,0
age,0
sex,0
bmi,0
children,0
smoker,0
region,0
charges,0


In [5]:
df.shape

(1338, 7)

In [6]:
df.describe()

Unnamed: 0,age,bmi,children,charges
count,1338.0,1338.0,1338.0,1338.0
mean,39.207025,30.663397,1.094918,13270.422265
std,14.04996,6.098187,1.205493,12110.011237
min,18.0,15.96,0.0,1121.8739
25%,27.0,26.29625,0.0,4740.28715
50%,39.0,30.4,1.0,9382.033
75%,51.0,34.69375,2.0,16639.912515
max,64.0,53.13,5.0,63770.42801


In [7]:
df=pd.get_dummies(df,drop_first=True)
df.head()

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes,region_northwest,region_southeast,region_southwest
0,19,27.9,0,16884.924,False,True,False,False,True
1,18,33.77,1,1725.5523,True,False,False,True,False
2,28,33.0,3,4449.462,True,False,False,True,False
3,33,22.705,0,21984.47061,True,False,True,False,False
4,32,28.88,0,3866.8552,True,False,True,False,False


In [8]:
X=df.drop('charges',axis=1)
y=df['charges']

In [9]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [10]:
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((1070, 8), (268, 8), (1070,), (268,))

In [11]:
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)


In [12]:
model=Sequential() #Model creation


In [14]:
#Adding Layers
model.add(Dense(16,activation='relu',input_shape=(X_train.shape[1],)))
model.add(Dense(8,activation='relu'))
model.add(Dense(1))

In [15]:
model.compile(optimizer='adam',loss='mse',metrics=['mae'])

In [16]:
model.fit(X_train,y_train,epochs=100,batch_size=32,validation_split=0.2)


Epoch 1/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step - loss: 320723872.0000 - mae: 13213.6064 - val_loss: 310365376.0000 - val_mae: 12655.4482
Epoch 2/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 347449760.0000 - mae: 13728.8545 - val_loss: 310358080.0000 - val_mae: 12655.1865
Epoch 3/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 346975712.0000 - mae: 13997.7432 - val_loss: 310341376.0000 - val_mae: 12654.6143
Epoch 4/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 312754144.0000 - mae: 13396.9668 - val_loss: 310302912.0000 - val_mae: 12653.3369
Epoch 5/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 311431360.0000 - mae: 13144.2168 - val_loss: 310216640.0000 - val_mae: 12650.5469
Epoch 6/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 335937888.0000 - mae: 13660.33

<keras.src.callbacks.history.History at 0x7c2835727950>

In [17]:
loss,mae=model.evaluate(X_test,y_test)
print(f'Mean Absolute Error: {mae}')


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 27229786.0000 - mae: 3670.1636 
Mean Absolute Error: 3857.278564453125


In [25]:
df.head(2)

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes,region_northwest,region_southeast,region_southwest
0,19,27.9,0,16884.924,False,True,False,False,True
1,18,33.77,1,1725.5523,True,False,False,True,False


In [28]:
# age	bmi	children	charges	sex_male	smoker_yes	region_northwest	region_southeast	region_southwest
new_customer=pd.DataFrame({
    'age':[40],
    'bmi':[27],
    'children':[2],
     'sex_male':[1],
    'smoker_yes':[1],
    'region_northwest':[0],
    'region_southeast':[0],
    'region_southwest':[1]
})

new_customer_scaled=scaler.transform(new_customer)
prediction=model.predict(new_customer_scaled)
print(f'Predicted Charges: {prediction[0][0]}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
Predicted Charges: 29964.89453125
