In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, Ridge
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder




In [16]:
# Load the dataset
data = pd.read_csv('insurance.csv')



In [17]:
# Preprocess the categorical variables 'sex' and 'smoker' using one-hot encoding
data_encoded = pd.get_dummies(data, columns=['sex', 'smoker','region'], drop_first=True)



In [18]:
# Separate the features (X) and target variable (y)
X = data_encoded.drop('charges', axis=1)
y = data_encoded['charges']



In [19]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [20]:
# Initialize and train the Lasso regressor
lasso_regressor = Lasso(alpha=0.5)  # Adjust the regularization parameter alpha as needed
lasso_regressor.fit(X_train, y_train)



In [21]:
# Predict on the testing set using the Lasso regressor
lasso_y_pred = lasso_regressor.predict(X_test)



In [22]:
# Calculate metrics for Lasso regressor
lasso_rmse = mean_squared_error(y_test, lasso_y_pred, squared=False)
lasso_r2 = r2_score(y_test, lasso_y_pred)

print("Lasso Regressor")
print("RMSE:", lasso_rmse)
print("R^2 Score:", lasso_r2)



Lasso Regressor
RMSE: 5796.653996522285
R^2 Score: 0.7835653970726435


In [23]:
# Initialize and train the Ridge regressor
ridge_regressor = Ridge(alpha=0.5)  # Adjust the regularization parameter alpha as needed
ridge_regressor.fit(X_train, y_train)



In [24]:
# Predict on the testing set using the Ridge regressor
ridge_y_pred = ridge_regressor.predict(X_test)



In [25]:
# Calculate metrics for Ridge regressor
ridge_rmse = mean_squared_error(y_test, ridge_y_pred, squared=False)
ridge_r2 = r2_score(y_test, ridge_y_pred)

print("\nRidge Regressor")
print("RMSE:", ridge_rmse)
print("R^2 Score:", ridge_r2)


Ridge Regressor
RMSE: 5798.303050236237
R^2 Score: 0.7834422353128189
