In [1]:
import pandas as pd 
import numpy as np 
from sklearn.datasets import fetch_california_housing

In [6]:
# preparing data for model training
housing = fetch_california_housing(as_frame=True)
df = housing['data'].copy()
target = housing['target']

In [22]:
# preparing data for model training
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import SGDRegressor



# processing pipeline
preprocess = Pipeline([
    ('scaler', StandardScaler()),
])

scaled_data = preprocess.fit_transform(df)

# splitting dataset
X_train, X_test, y_train, y_test = train_test_split(scaled_data, target, test_size=0.3, random_state=1)


# model initialization
model = SGDRegressor(random_state=1)


# hyperparameter tuning
params = {
    'loss' : ['squared_error', 'huber'],
    'penalty' : ['l1', 'l2'],
    'alpha' : [0.1, 0.01, 0.001],
    'max_iter' : [1000, 2000, 5000]
}

cross_validator = GridSearchCV(model, params, cv=4, return_train_score=True)
cross_validator.fit(X_train, y_train)


In [23]:
cross_validator.best_estimator_

In [24]:
round(cross_validator.score(X_test, y_test), 4)

0.557

In [25]:
cross_validator.best_params_

{'alpha': 0.001, 'loss': 'huber', 'max_iter': 1000, 'penalty': 'l2'}

In [26]:
# preparing data for model training
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge


# splitting dataset
X_train, X_test, y_train, y_test = train_test_split(scaled_data, target, test_size=0.3, random_state=1)

# processing pipeline
preprocess = Pipeline([
    ('scaler', StandardScaler()),
])

# model initialization
model = Ridge()


# hyperparameter tuning
params = {
    'alpha' : [0.5, 0.1, 0.05, 0.01, 0.005, 0.001]
}

cross_validator = GridSearchCV(model, params, cv=4, return_train_score=True)
cross_validator.fit(X_train, y_train)


In [27]:
round(cross_validator.score(X_test, y_test), 4)

0.5971

In [28]:
cross_validator.best_params_

{'alpha': 0.5}

In [33]:
# preparing data for model training
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Lasso


# splitting dataset
X_train, X_test, y_train, y_test = train_test_split(scaled_data, target, test_size=0.3, random_state=1)


# model initialization
model = Lasso()

# hyperparameter tuning
params = {
    'alpha' : [0.5, 0.1, 0.05, 0.01, 0.005, 0.001]
}

cross_validator = GridSearchCV(model, params, cv=6, return_train_score=True)
cross_validator.fit(X_train, y_train)


In [34]:
round(cross_validator.score(X_test, y_test),4)

0.5971

In [35]:
cross_validator.best_params_

{'alpha': 0.001}