In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import loguniform
from scipy.stats import uniform

from sklearn.datasets import fetch_california_housing
from sklearn.dummy import DummyRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoCV
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import Ridge
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import validation_curve
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [None]:
features, labels = fetch_california_housing(as_frame=True, return_X_y=True)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    features, labels, random_state=1, test_size=0.3)

In [None]:
pipeline = Pipeline([("feature_scaling", StandardScaler()),
                     ("sgd", SGDRegressor(random_state=1)),
                     ])

param_grid = {
    'sgd__loss': ['squared_error', 'huber'],
    'sgd__penalty': ['l1', 'l2'],
    'sgd__alpha': [0.1, 0.01, 0.001],
    'sgd__max_iter': [1000, 2000, 5000]
}

grid_search = GridSearchCV(pipeline, param_grid, cv=4, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

test_mse = mean_squared_error(y_test, y_pred)
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Test Mean Squared Error: {test_mse}")

Best Parameters: {'sgd__alpha': 0.01, 'sgd__loss': 'squared_error', 'sgd__max_iter': 1000, 'sgd__penalty': 'l1'}
Test Mean Squared Error: 0.5323098267577917


In [None]:
pipeline = Pipeline([("feature_scaling", StandardScaler()),
                     ("ridge", Ridge()),
                     ])

param_grid = {'ridge__alpha': [0.5,0.1,0.05,0.01,0.005,0.001]}

grid_search = GridSearchCV(pipeline, param_grid, cv=4, scoring='neg_mean_squared_error', n_jobs=-1)

grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

test_mse = mean_squared_error(y_test, y_pred)
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Test Mean Squared Error: {test_mse}")

Best Parameters: {'ridge__alpha': 0.5}
Test Mean Squared Error: 0.5296265707544732


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    features, labels, random_state=1, test_size=0.4)

In [None]:
pipeline = Pipeline([("feature_scaling", StandardScaler()),
                     ("lasso", Lasso()),
                     ])

param_grid = {'lasso__alpha': [0.5,0.1,0.05,0.01,0.005,0.001]}

grid_search = GridSearchCV(pipeline, param_grid, cv=6, scoring='neg_mean_squared_error', n_jobs=-1)

grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

test_mse = mean_squared_error(y_test, y_pred)
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Test Mean Squared Error: {test_mse}")

Best Parameters: {'lasso__alpha': 0.001}
Test Mean Squared Error: 0.5215888844944333
