In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_openml
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [2]:
boston = fetch_openml(name="boston", version=1, as_frame=True)
X = boston.data
y = boston.target

mask = ~y.isna()
X = X.loc[mask]
y = y.loc[mask]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

##linear Regression
pipe = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler()),
    ("lr", LinearRegression())
])

pipe.fit(X_train, y_train)

y_pred = pipe.predict(X_test)

rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2   = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print(f"Test RMSE: {rmse:.3f}")
print(f"Test R^2 : {r2:.3f}")
print(f"Test MAE : {mae:.3f}")

Test RMSE: 4.929
Test R^2 : 0.669
Test MAE : 3.189


In [3]:
##Ridge
pipe_ridge = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler()),
    ("model", Ridge())
])

param_grid = {"model__alpha": np.logspace(-4, 4, 25)}
gs = GridSearchCV(pipe_ridge, param_grid, cv=5, scoring="neg_mean_squared_error", n_jobs=-1)

gs.fit(X_train, y_train)
print("Best alpha:", gs.best_params_)

best_pipe = gs.best_estimator_

print("Test RMSE:", np.sqrt(-gs.best_score_))   # CV score (approx)

y_pred = best_pipe.predict(X_test)

print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("R^2:", r2_score(y_test, y_pred))
print("MAE:", mean_absolute_error(y_test, y_pred))

Best alpha: {'model__alpha': np.float64(2.154434690031882)}
Test RMSE: 4.862826490414947
RMSE: 4.9333552916037435
R^2: 0.6681202934807451
MAE: 3.182668828293336
