In [28]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import SGDRegressor, Ridge, Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

In [20]:
# Fetch the California housing dataset
dataset = fetch_california_housing()

# Split the dataset into training and testing sets
train, test, target_train, target_test = train_test_split(dataset.data, dataset.target, test_size=0.3, random_state=1)

# Define the pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler(with_mean=True, with_std=True)),
    ('regressor', SGDRegressor())
])

# Define the parameter grid
param_grid = {
    'regressor__alpha': [0.1, 0.01, 0.001],
    'regressor__loss': ['squared_error', 'huber'],
    'regressor__penalty': ['l1', 'l2'],
    'regressor__max_iter': [1000, 2000, 5000]
}

# Create the GridSearchCV object
grid_search = GridSearchCV(pipeline, param_grid, cv=4)

# Fit the GridSearchCV object
grid_search.fit(train, target_train)

# Print the best parameters and the score on the test set
# print("Best parameters:", grid_search.best_params_)
print("Test set score:", grid_search.score(test, target_test))
print("Best Alpha:", grid_search.best_params_['regressor__alpha'])
print("Best maximum iterations:", grid_search.best_params_['regressor__max_iter'])

Test set score: 0.5894461909174828
Best Alpha: 0.01
Best maximum iterations: 2000


In [26]:
# Fetch the California housing dataset
dataset = fetch_california_housing()

# Split the dataset into training and testing sets
train, test, target_train, target_test = train_test_split(dataset.data, dataset.target, test_size=0.3, random_state=1)

# Define the pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler(with_mean=True, with_std=True)),
    ('regressor', Ridge())
])

# Define the parameter grid
param_grid = {
    'regressor__alpha': [0.5, 0.1, 0.01, 0.005, 0.001],
    'regressor__fit_intercept': [True, False],  # Note: Ridge uses 'fit_intercept'
}

# Create the GridSearchCV object
grid_search = GridSearchCV(pipeline, param_grid, cv=4, scoring='r2')

# Fit the GridSearchCV object
grid_search.fit(train, target_train)

# Print the test set score and best parameters
print("Test set score:", grid_search.score(test, target_test))
print("Best Alpha:", grid_search.best_params_['regressor__alpha'])


Test set score: 0.597145061224877
Best Alpha: 0.5


In [29]:
# Fetch the California housing dataset
dataset = fetch_california_housing()

# Split the dataset into training and testing sets
train, test, target_train, target_test = train_test_split(dataset.data, dataset.target, test_size=0.3, random_state=1)

# Define the pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler(with_mean=True, with_std=True)),
    ('regressor', Lasso())
])

# Define the parameter grid
param_grid = {
    'regressor__alpha': [0.5, 0.1, 0.01, 0.005, 0.001],
    'regressor__fit_intercept': [True, False],
}

# Create the GridSearchCV object
grid_search = GridSearchCV(pipeline, param_grid, cv=6, scoring='r2')

# Fit the GridSearchCV object
grid_search.fit(train, target_train)

# Print the test set score and best parameters
print("Test set score:", grid_search.score(test, target_test))
print("Best Alpha:", grid_search.best_params_['regressor__alpha'])

Test set score: 0.5971275080716549
Best Alpha: 0.001
