In [21]:
# imports
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline

from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [22]:
# data
housing = fetch_california_housing()
X, y = housing['data'], housing['target']

In [23]:
# split data train test
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.1,
    shuffle=True,
    random_state=13)

In [24]:
# pipeline
pipeline = [
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler()),
    ('lr', LinearRegression())
]
pipe = Pipeline(pipeline)

In [25]:
# fit
pipe.fit(X_train, y_train)

In [36]:
y_train_preds = pipe.predict(X_train)
y_train_preds[:10]

array([1.54505951, 2.21338527, 2.2636205 , 3.3258957 , 1.51710076,
       1.63209319, 2.9265211 , 0.78080924, 1.21968217, 0.72656239])

In [37]:
y_test_preds = pipe.predict(X_test)
y_test_preds[:10]

array([ 1.82212706,  1.98357668,  0.80547979, -0.19259114,  1.76072418,
        3.27855815,  2.12056804,  1.96099917,  2.38239663,  1.21005304])

In [32]:
r2_score(y_train, y_train_preds), r2_score(y_test, y_test_preds)

(0.6079874818809448, 0.5903435927516573)

In [33]:
mean_squared_error(y_train, y_train_preds), mean_squared_error(y_test, y_test_preds)

(0.5210784446797679, 0.5537420654727399)

In [34]:
mean_absolute_error(y_train, y_train_preds), mean_squared_error(y_test, y_test_preds)

(0.5300159371615256, 0.5537420654727399)