In [1]:
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.metrics import r2_score, mean_squared_error

# Load the diabetes dataset
data = load_diabetes()
X, y = data.data, data.target

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Ordinary Least Squares (OLS) - No regularization
ols = LinearRegression()
ols.fit(X_train, y_train)

# Predictions and performance
ols_train_pred = ols.predict(X_train)
ols_test_pred = ols.predict(X_test)

ols_train_r2 = r2_score(y_train, ols_train_pred)
ols_test_r2 = r2_score(y_test, ols_test_pred)
ols_test_mse = mean_squared_error(y_test, ols_test_pred)

# Lasso Regression (with alpha = 1.0)
lasso = Lasso(alpha=1.0)
lasso.fit(X_train, y_train)

# Predictions and performance
lasso_train_pred = lasso.predict(X_train)
lasso_test_pred = lasso.predict(X_test)

lasso_train_r2 = r2_score(y_train, lasso_train_pred)
lasso_test_r2 = r2_score(y_test, lasso_test_pred)
lasso_test_mse = mean_squared_error(y_test, lasso_test_pred)

# Print results
print("OLS (No Regularization):")
print(f"Training R²: {ols_train_r2:.4f}")
print(f"Test R²: {ols_test_r2:.4f}")
print(f"Test MSE: {ols_test_mse:.4f}\n")

print("Lasso Regression (alpha = 1.0):")
print(f"Training R²: {lasso_train_r2:.4f}")
print(f"Test R²: {lasso_test_r2:.4f}")
print(f"Test MSE: {lasso_test_mse:.4f}")

# Compare coefficients
print("\nSample Coefficients (First 5 Features):")
print(f"OLS: {ols.coef_[:5]}")
print(f"Lasso: {lasso.coef_[:5]}")

OLS (No Regularization):
Training R²: 0.5279
Test R²: 0.4526
Test MSE: 2900.1936

Lasso Regression (alpha = 1.0):
Training R²: 0.3646
Test R²: 0.3576
Test MSE: 3403.5757

Sample Coefficients (First 5 Features):
OLS: [  37.90402135 -241.96436231  542.42875852  347.70384391 -931.48884588]
Lasso: [  0.          -0.         413.43184792  34.83051518   0.        ]
