In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error, r2_score

In [3]:
from sklearn.datasets import fetch_california_housing
import warnings

warnings.filterwarnings("ignore")

# Load dataset
data = fetch_california_housing()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['PRICE'] = data.target

# Display dataset
print(df.head())

   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   
1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   
2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   
3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   
4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   

   Longitude  PRICE  
0    -122.23  4.526  
1    -122.22  3.585  
2    -122.24  3.521  
3    -122.25  3.413  
4    -122.25  3.422  


In [4]:
# Define features and target
X = df.drop('PRICE', axis=1)
y = df['PRICE']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training set size: {X_train.shape}")
print(f"Testing set size: {X_test.shape}")

Training set size: (16512, 8)
Testing set size: (4128, 8)


In [5]:
# Initialize Elastic Net model
elastic_net = ElasticNet(alpha=0.1, l1_ratio=0.5, random_state=42)

# Train the model
elastic_net.fit(X_train, y_train)

# Predict on test set
y_pred = elastic_net.predict(X_test)

In [6]:
# Evaluate performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R-squared: {r2:.2f}")

Mean Squared Error: 0.57
R-squared: 0.56


In [7]:
# Display feature coefficients
coefficients = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': elastic_net.coef_
}).sort_values(by='Coefficient', ascending=False)

print(coefficients)

      Feature  Coefficient
0      MedInc     0.386286
1    HouseAge     0.012987
4  Population     0.000008
2    AveRooms     0.000000
3   AveBedrms     0.000000
5    AveOccup    -0.003279
7   Longitude    -0.233727
6    Latitude    -0.240098


In [8]:
from sklearn.model_selection import GridSearchCV

# Define parameter grid
param_grid = {
    'alpha': [0.1, 1.0, 10.0],
    'l1_ratio': [0.1, 0.5, 0.9]
}

# Grid search
elastic_net_cv = GridSearchCV(ElasticNet(random_state=42), param_grid, cv=5, scoring='r2')
elastic_net_cv.fit(X_train, y_train)

# Best parameters
print(f"Best alpha: {elastic_net_cv.best_params_['alpha']}")
print(f"Best l1_ratio: {elastic_net_cv.best_params_['l1_ratio']}")

Best alpha: 0.1
Best l1_ratio: 0.1
