# Medical Insurnce Cost Prediction

Medical Insurance Cost Prediction involves using machine learning to estimate the healthcare costs for individuals based on features 
like age, sex, BMI, number of children, smoking status, and geographic region. By analyzing patterns in historical data, a regression model 
(like Linear Regression) can predict the likely insurance charges. This prediction helps insurance companies assess risk and tailor premiums, 
while allowing individuals to get an estimate of their medical costs based on personal health factors. Features like smoking and BMI tend to have a 
strong impact on the predicted costs.

# IMPORT LIBRARY

In [5]:
# import library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics

# PRE PROCESING

In [6]:
# import data
insurance = pd.read_csv("D:\\Machine learning datasets\\insurance.csv")

In [7]:
insurance.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [8]:
# find information about data
insurance.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1338 entries, 0 to 1337
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1338 non-null   int64  
 1   sex       1338 non-null   object 
 2   bmi       1338 non-null   float64
 3   children  1338 non-null   int64  
 4   smoker    1338 non-null   object 
 5   region    1338 non-null   object 
 6   charges   1338 non-null   float64
dtypes: float64(2), int64(2), object(3)
memory usage: 73.3+ KB


In [9]:
# check null values
insurance.isnull().sum()

age         0
sex         0
bmi         0
children    0
smoker      0
region      0
charges     0
dtype: int64

In [10]:
insurance.describe()

Unnamed: 0,age,bmi,children,charges
count,1338.0,1338.0,1338.0,1338.0
mean,39.207025,30.663397,1.094918,13270.422265
std,14.04996,6.098187,1.205493,12110.011237
min,18.0,15.96,0.0,1121.8739
25%,27.0,26.29625,0.0,4740.28715
50%,39.0,30.4,1.0,9382.033
75%,51.0,34.69375,2.0,16639.912515
max,64.0,53.13,5.0,63770.42801


In [11]:
# encoding sex column
insurance.replace({'sex':{'male':0,'female':1}}, inplace=True)

3 # encoding 'smoker' column
insurance.replace({'smoker':{'yes':0,'no':1}}, inplace=True)

# encoding 'region' column
insurance.replace({'region':{'southeast':0,'southwest':1,'northeast':2,'northwest':3}}, inplace=True)

  insurance.replace({'sex':{'male':0,'female':1}}, inplace=True)
  insurance.replace({'smoker':{'yes':0,'no':1}}, inplace=True)
  insurance.replace({'region':{'southeast':0,'southwest':1,'northeast':2,'northwest':3}}, inplace=True)


In [12]:
insurance.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,1,27.9,0,0,1,16884.924
1,18,0,33.77,1,1,0,1725.5523
2,28,0,33.0,3,1,0,4449.462
3,33,0,22.705,0,1,3,21984.47061
4,32,0,28.88,0,1,3,3866.8552


In [13]:
X = insurance.iloc[:,[0,1,2,3,4,5]].values
Y = insurance.iloc[:,[6]].values

In [14]:
X

array([[19.  ,  1.  , 27.9 ,  0.  ,  0.  ,  1.  ],
       [18.  ,  0.  , 33.77,  1.  ,  1.  ,  0.  ],
       [28.  ,  0.  , 33.  ,  3.  ,  1.  ,  0.  ],
       ...,
       [18.  ,  1.  , 36.85,  0.  ,  1.  ,  0.  ],
       [21.  ,  1.  , 25.8 ,  0.  ,  1.  ,  1.  ],
       [61.  ,  1.  , 29.07,  0.  ,  0.  ,  3.  ]])

In [15]:
Y

array([[16884.924 ],
       [ 1725.5523],
       [ 4449.462 ],
       ...,
       [ 1629.8335],
       [ 2007.945 ],
       [29141.3603]])

In [16]:
# do train test and split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

In [17]:
# check shapes of data
print(X.shape, X_train.shape, X_test.shape)

(1338, 6) (1070, 6) (268, 6)


In [18]:
regressor = LinearRegression()

In [19]:
regressor.fit(X_train, Y_train)

In [20]:
# predict the data
training_data_prediction =regressor.predict(X_train)

In [21]:
r2_train = metrics.r2_score(Y_train, training_data_prediction)
print('R squared vale : ', r2_train)

R squared vale :  0.751505643411174


In [22]:
test_data_prediction =regressor.predict(X_test)

In [23]:
r2_test = metrics.r2_score(Y_test, test_data_prediction)
print('R squared vale : ', r2_test)

R squared vale :  0.7447273869684077


In [25]:
input_data = (31, 1, 25.74, 0, 1, 0)

# changing input_data to a numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the array
input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)

# make prediction
prediction = regressor.predict(input_data_reshaped)

# access the first element of the prediction
print('The insurance cost is USD', prediction[0])

The insurance cost is USD [3760.0805765]


In [28]:
import gradio as gr

In [31]:
# Function to make predictions for new input data
def predict_insurance_cost(age, sex, bmi, children, smoker, region):
    input_data = (age, sex, bmi, children, smoker, region)
    
    # Convert the input data to numpy array
    input_data_as_numpy_array = np.asarray(input_data)
    
    # Reshape the array
    input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)
    
    # Make prediction
    prediction = regressor.predict(input_data_reshaped)
    
    # Return the prediction (rounded to two decimal places)
    return f'The estimated insurance cost is USD {prediction[0][0]:.2f}'

# Create the Gradio interface using gr.components instead of gr.inputs
interface = gr.Interface(
    fn=predict_insurance_cost,
    inputs=[
        gr.Number(label="Age"),
        gr.Radio(["Male", "Female"], label="Sex"),  # Encoded as 0 for male, 1 for female
        gr.Number(label="BMI"),
        gr.Number(label="Number of Children"),
        gr.Radio(["Yes", "No"], label="Smoker"),  # Encoded as 0 for yes, 1 for no
        gr.Dropdown(["Southeast", "Southwest", "Northeast", "Northwest"], label="Region"),  # Encoded region
    ],
    outputs="text",
    title="Insurance Cost Prediction",
    description="Enter the details to estimate the insurance cost."
)

# Launch the interface
interface.launch()

Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.




Traceback (most recent call last):
  File "C:\Users\lenovo\anaconda3\Lib\site-packages\gradio\queueing.py", line 536, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\lenovo\anaconda3\Lib\site-packages\gradio\route_utils.py", line 321, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\lenovo\anaconda3\Lib\site-packages\gradio\blocks.py", line 1935, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\lenovo\anaconda3\Lib\site-packages\gradio\blocks.py", line 1520, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\lenovo\anaconda3\Lib\site-packages\anyio\to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thre