In [27]:
import sklearn
sklearn.__version__="1.2.2"

In [42]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, SGDRegressor, Lasso, Ridge
from sklearn.metrics import r2_score, mean_squared_error, explained_variance_score, max_error, mean_absolute_error
import numpy as np

In [29]:
# Load the California housing dataset
data = fetch_california_housing()

In [30]:
# Split data into training and testing sets (70:30 split)
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size = 0.3, random_state=1)

In [31]:
# Standardize features
scaler = StandardScaler(with_mean=True, with_std=True)
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [32]:
# Define SGDRegressor model
sgd = SGDRegressor(random_state=1)

In [33]:
# Define hyperparameter grid
param_grid = {
    'loss': ['squared_error', 'huber'],
    'penalty': ['l1','l2'],
    'alpha':[0.1, 0.01, 0.001],
    'max_iter': [1000, 2000, 5000]
}

In [34]:
# Perform GridSearchCV with 4-fold cross-validation
grid_search = GridSearchCV(sgd, param_grid, cv=4, scoring='r2', n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)

In [35]:
# Best model from grid search
best_model = grid_search.best_estimator_

In [36]:
# Predict on test set
y_pred = best_model.predict(X_test_scaled)

In [37]:
# Compute R-squared value
r2 = r2_score(y_test, y_pred)
print("R-squared value on test set:", r2)

R-squared value on test set: 0.5951040704728554


In [38]:
# Print the best hyperparameters
best_params = grid_search.best_params_
print("Best Hyperparameters:",best_params)

Best Hyperparameters: {'alpha': 0.01, 'loss': 'squared_error', 'max_iter': 1000, 'penalty': 'l1'}


In [39]:
# Print the best alpha value
best_alpha = best_params['alpha']
print("Best Alpha Value:", best_alpha)

Best Alpha Value: 0.01


In [40]:
# Print the best max_iter value
best_max_iter = best_params['max_iter']
print("Best Maximum Number of Passes:", best_max_iter)


Best Maximum Number of Passes: 1000


In [43]:
# Define Ridge Regression model
ridge = Ridge()

In [44]:
# Define hyperparameter grid
param_grid = {
    'alpha': [0.5, 0.1, 0.05, 0.01, 0.005, 0.001],
    'fit_intercept': [True, False]
}

In [45]:
# Perform GridSearchCV with 4-fold cross-validation
grid_search = GridSearchCV(ridge, param_grid, cv=4, scoring='r2', n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)


In [46]:
# Best model from grid search
best_model = grid_search.best_estimator_

In [47]:
# Predict on test set
y_pred = best_model.predict(X_test_scaled)

In [48]:
# Compute R-squared value
r2 = r2_score(y_test, y_pred)
print("R-squared value on test set:", round(r2, 4))

R-squared value on test set: 0.5971


In [49]:
# Print the best hyperparameters
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

# Print the best alpha value
best_alpha = best_params['alpha']
print("Best Alpha Value:", best_alpha)

Best Hyperparameters: {'alpha': 0.5, 'fit_intercept': True}
Best Alpha Value: 0.5


In [19]:
# Define Lasso model
lasso = Lasso()

In [20]:
# Define hyperparameter grid
param_grid = {
    'alpha': [0.5, 0.1, 0.05, 0.01, 0.005, 0.001],
    'fit_intercept': [True, False]
}

In [21]:
# Perform GridSearchCV with 6-fold cross-validation
grid_search = GridSearchCV(lasso, param_grid, cv=6, scoring='r2', n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)

In [22]:
# Best model from grid search
best_model = grid_search.best_estimator_

In [23]:
# Predict on test set
y_pred = best_model.predict(X_test_scaled)

In [24]:
# Compute R-squared value
r2 = r2_score(y_test, y_pred)
print("R-squared value on test set:", r2)

R-squared value on test set: 0.5971275080716549


In [25]:
# Print the best hyperparameters
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)


Best Hyperparameters: {'alpha': 0.001, 'fit_intercept': True}


In [26]:
# Print the best alpha value
best_alpha = best_params['alpha']
print("Best Alpha Value:", best_alpha)


Best Alpha Value: 0.001
