In [1]:
# Exercise 3: Regression

In [2]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline

In [3]:
# data
housing = fetch_california_housing()
X, y = housing['data'], housing['target']

# split data train test
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.1,
                                                    shuffle=True,
                                                    random_state=13)

# Build pipeline
pipeline = [('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler()),
            ('lr', LinearRegression())]
pipe = Pipeline(pipeline)

# fit
pipe.fit(X_train, y_train)

# Predict on train and test sets
y_train_pred = pipe.predict(X_train)
y_test_pred = pipe.predict(X_test)

print("Train values predicted:\n", y_train_pred[:10])
print("Test values predicted:\n", y_test_pred[:10])

Train values predicted:
 [1.54505951 2.21338527 2.2636205  3.3258957  1.51710076 1.63209319
 2.9265211  0.78080924 1.21968217 0.72656239]
Test values predicted:
 [ 1.82212706  1.98357668  0.80547979 -0.19259114  1.76072418  3.27855815
  2.12056804  1.96099917  2.38239663  1.21005304]


In [4]:
# --- Train Set Metrics ---
r2_train = r2_score(y_train, y_train_pred)
mse_train = mean_squared_error(y_train, y_train_pred)
mae_train = mean_absolute_error(y_train, y_train_pred)

# --- Test Set Metrics ---
r2_test = r2_score(y_test, y_test_pred)
mse_test = mean_squared_error(y_test, y_test_pred)
mae_test = mean_absolute_error(y_test, y_test_pred)

# Display results
print("Train Set Metrics:")
print(f"  R²:  {r2_train:.4f}")
print(f"  MSE: {mse_train:.4f}")
print(f"  MAE: {mae_train:.4f}")

print("\nTest Set Metrics:")
print(f"  R²:  {r2_test:.4f}")
print(f"  MSE: {mse_test:.4f}")
print(f"  MAE: {mae_test:.4f}")

Train Set Metrics:
  R²:  0.6080
  MSE: 0.5211
  MAE: 0.5300

Test Set Metrics:
  R²:  0.5903
  MSE: 0.5537
  MAE: 0.5454
