<a href="https://colab.research.google.com/github/sona5kyyy/PROJECTEXPO/blob/main/MedicalInsurancepp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset, specifying the delimiter
data = pd.read_csv('/content/insurance.csv', delimiter=',') # Changed delimiter to ','

# Check for missing values and data info
print("Data Information:")
print(data.info())
print("\nMissing Values:")
print(data.isnull().sum())

# Convert categorical columns using LabelEncoder (for 'gender', 'smoker', and 'region')
le_gender = LabelEncoder()
le_smoker = LabelEncoder()
le_region = LabelEncoder()

data['gender'] = le_gender.fit_transform(data['gender'])
data['smoker'] = le_smoker.fit_transform(data['smoker'])
data['region'] = le_region.fit_transform(data['region'])

# Split the data into features (X) and target variable (y)
X = data[['age', 'gender', 'bmi', 'children', 'smoker', 'region']]
y = data['charges']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create and train the Linear Regression model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test_scaled)

# Display predicted insurance prices along with the actual prices
predicted_data = pd.DataFrame({
    'Predicted Insurance Price': y_pred,
    'Actual Insurance Price': y_test.values
})

print("\nPredicted vs Actual Insurance Prices for Test Data:")
print(predicted_data)

# Optional: Model evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"\nMean Squared Error: {mse}")
print(f"R-squared Score: {r2}")

# Predict new insurance price for a new user (custom input)
def predict_new_price(age, gender, bmi, children, smoker, region):
    # Encode the input features
    gender_encoded = le_gender.transform([gender])[0]
    smoker_encoded = le_smoker.transform([smoker])[0]
    region_encoded = le_region.transform([region])[0]

    # Create a DataFrame for the new data
    new_data = pd.DataFrame({
        'age': [age],
        'gender': [gender_encoded],
        'bmi': [bmi],
        'children': [children],
        'smoker': [smoker_encoded],
        'region': [region_encoded]
    })

    # Normalize the new data using the same scaler
    new_data_scaled = scaler.transform(new_data)

    # Predict the insurance price for the new data
    new_price = model.predict(new_data_scaled)
    return new_price[0]

# Example usage:
age = 35
gender = 'male'
bmi = 24.0
children = 2
smoker = 'yes'
region = 'southeast'

predicted_price = predict_new_price(age, gender, bmi, children, smoker, region)
print(f"\nPredicted Insurance Price for New Person: ${predicted_price:.2f}")

Data Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1338 entries, 0 to 1337
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1338 non-null   int64  
 1   gender    1338 non-null   object 
 2   bmi       1338 non-null   float64
 3   children  1338 non-null   int64  
 4   smoker    1338 non-null   object 
 5   region    1338 non-null   object 
 6   charges   1338 non-null   float64
dtypes: float64(2), int64(2), object(3)
memory usage: 73.3+ KB
None

Missing Values:
age         0
gender      0
bmi         0
children    0
smoker      0
region      0
charges     0
dtype: int64

Predicted vs Actual Insurance Prices for Test Data:
     Predicted Insurance Price  Actual Insurance Price
0                  8924.407244              9095.06825
1                  7116.295018              5272.17580
2                 36909.013521             29330.98315
3                  9507.874691              9301.89355
4