In [3]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing # Changed from load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score

In [5]:
from sklearn.datasets import fetch_california_housing
import pandas as pd

# Load the California housing dataset
housing = fetch_california_housing()
X = housing.data
y = housing.target

# Create a pandas DataFrame
df = pd.DataFrame(X, columns=housing.feature_names)
df['PRICE'] = y

# Display the first few rows of the DataFrame
print(df.head())

   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   
1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   
2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   
3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   
4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   

   Longitude  PRICE  
0    -122.23  4.526  
1    -122.22  3.585  
2    -122.24  3.521  
3    -122.25  3.413  
4    -122.25  3.422  


In [6]:
df.isnull().sum()

Unnamed: 0,0
MedInc,0
HouseAge,0
AveRooms,0
AveBedrms,0
Population,0
AveOccup,0
Latitude,0
Longitude,0
PRICE,0


In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [8]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
# Ridge Regression
ridge = Ridge(alpha=1.0)
ridge.fit(X_train_scaled, y_train)

In [11]:
# Lasso Regression
lasso = Lasso(alpha=0.1)
lasso.fit(X_train_scaled, y_train)

In [12]:
ridge_pred = ridge.predict(X_test_scaled)
lasso_pred = lasso.predict(X_test_scaled)

In [13]:
print("----- Ridge Regression -----")
print("MSE:", mean_squared_error(y_test, ridge_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, ridge_pred)))
print("R² Score:", r2_score(y_test, ridge_pred))

----- Ridge Regression -----
MSE: 0.5558548589435974
RMSE: 0.7455567442814782
R² Score: 0.575815742891368


In [15]:
print("\n----- Lasso Regression -----")
print("MSE:", mean_squared_error(y_test, lasso_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, lasso_pred)))
print("R² Score:", r2_score(y_test, lasso_pred))


----- Lasso Regression -----
MSE: 0.6796290284328821
RMSE: 0.8243961598848469
R² Score: 0.4813611325029077


In [17]:
coef_df = pd.DataFrame({
    "Feature": housing.feature_names,
    "Ridge Coeff": ridge.coef_,
    "Lasso Coeff": lasso.coef_
})
print(coef_df)

      Feature  Ridge Coeff  Lasso Coeff
0      MedInc     0.854327     0.710598
1    HouseAge     0.122624     0.106453
2    AveRooms    -0.294210    -0.000000
3   AveBedrms     0.339008     0.000000
4  Population    -0.002282    -0.000000
5    AveOccup    -0.040833    -0.000000
6    Latitude    -0.896168    -0.011469
7   Longitude    -0.869071    -0.000000
