## Imports

In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from IPython.display import display

## Load dataset

In [12]:
df = pd.read_csv('./house_price.csv')

# Features and target
X = df[['size', 'bedroom']]
y = df['price']

## Split into train and test sets

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Define models

In [14]:
models = {
    'LinearRegression': LinearRegression(),
    'SGDRegressor_Scaled': make_pipeline(
        StandardScaler(),
        SGDRegressor(max_iter=1000, tol=1e-3, learning_rate='constant', eta0=0.01, random_state=42)
    )
}

## Train, predict, and evaluate

In [15]:
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    mae  = mean_absolute_error(y_test, y_pred)
    mse  = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    
    # Retrieve coefficients and intercept
    if name == 'LinearRegression':
        coef_size, coef_bedroom = model.coef_
        intercept = model.intercept_
    else:
        # pipeline: scaler then regressor
        reg = model.named_steps['sgdregressor']
        coef_size, coef_bedroom = reg.coef_
        intercept = reg.intercept_
    
    results[name] = {
        'Coeff_size':    coef_size,
        'Coeff_bedroom': coef_bedroom,
        'Intercept':     intercept,
        'MAE':           mae,
        'MSE':           mse,
        'RMSE':          rmse,
        'MAPE':          mape
    }

## Display results


In [16]:
results_df = pd.DataFrame(results).T
display(results_df)

Unnamed: 0,Coeff_size,Coeff_bedroom,Intercept,MAE,MSE,RMSE,MAPE
LinearRegression,143.218532,-13512.564426,84763.622522,72334.753604,8610424544.77767,92792.373311,0.174605
SGDRegressor_Scaled,106364.871854,-10466.906599,[322796.80224115663],72290.321812,8638986133.931728,92946.146418,0.174353


## Trade‑offs Between Metrics

- **MAE (Mean Absolute Error)**  
  - Measures the average absolute dollar‑error.  
  - **Less sensitive to outliers**, each error counts equally.

- **MSE (Mean Squared Error)**  
  - Squares each error before averaging.  
  - **Penalizes larger errors more heavily**, so big misses hurt more.

- **RMSE (Root Mean Squared Error)**  
  - The square‑root of MSE, back in the original price units.  
  - **More interpretable** in dollars, but still emphasizes large errors.

- **MAPE (Mean Absolute Percentage Error)**  
  - Averages the absolute percentage errors.  
  - **Gives error as a percentage** of the true price, useful for relative comparisons across different price ranges.
