In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [None]:
# Make sure insurance.csv is in the same folder as your .py or .ipynb file
data = pd.read_csv("/content/drive/MyDrive/Datasets/insurance.csv")

print(data.head())
print(data.info())


   age     sex     bmi  children smoker     region      charges
0   19  female  27.900         0    yes  southwest  16884.92400
1   18    male  33.770         1     no  southeast   1725.55230
2   28    male  33.000         3     no  southeast   4449.46200
3   33    male  22.705         0     no  northwest  21984.47061
4   32    male  28.880         0     no  northwest   3866.85520
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1338 entries, 0 to 1337
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1338 non-null   int64  
 1   sex       1338 non-null   object 
 2   bmi       1338 non-null   float64
 3   children  1338 non-null   int64  
 4   smoker    1338 non-null   object 
 5   region    1338 non-null   object 
 6   charges   1338 non-null   float64
dtypes: float64(2), int64(2), object(3)
memory usage: 73.3+ KB
None


In [None]:
# Feature: BMI (2D array), Target: charges
X = data[["bmi"]]     # double brackets → X is 2D (needed for sklearn)
y = data["charges"]   # 1D vector


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,      # 20% for testing
    random_state=42     # for reproducibility
)

In [None]:
degree = 2  # you can change to 3, 4, ...

poly = PolynomialFeatures(degree=degree, include_bias=False)

# Fit on training data and transform
X_train_poly = poly.fit_transform(X_train)

# Just transform test data
X_test_poly = poly.transform(X_test)

print("Original X_train shape:", X_train.shape)
print("Transformed X_train_poly shape:", X_train_poly.shape)


Original X_train shape: (1070, 1)
Transformed X_train_poly shape: (1070, 2)


In [None]:
model = LinearRegression()

model.fit(X_train_poly, y_train)

print("Model coefficients:", model.coef_)
print("Model intercept:", model.intercept_)


Model coefficients: [1181.27586758  -12.57029044]
Model intercept: -10555.630116326045


In [None]:
y_pred = model.predict(X_test_poly)


In [None]:
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"R² Score: {r2:.4f}")


Mean Squared Error (MSE): 150472750.87
Root Mean Squared Error (RMSE): 12266.73
Mean Absolute Error (MAE): 9823.70
R² Score: 0.0308
