In [19]:
import sklearn as sk
import sklearn.datasets as skds
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, SGDRegressor
# import pipeline
from sklearn.pipeline import Pipeline
# import RMSE
from sklearn.metrics import mean_squared_error
# import variance, max error, mean squared error
from sklearn.metrics import explained_variance_score, max_error, mean_squared_error, mean_absolute_error

In [20]:
# load the california housing dataset
dataset = skds.fetch_california_housing()
training, testing = train_test_split(dataset.data, test_size=0.2, shuffle=False, random_state=0)

model = LinearRegression()
model.fit(training, dataset.target[:len(training)])
print(model.score(testing, dataset.target[len(training):]))

0.6605140591532082


In [21]:
# If we use StandardScaler for preprocessing and LinearRegression for fitting the model, what is the root mean squared error value for predictions obtained using test set features?
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', LinearRegression())
])
pipeline.fit(training, dataset.target[:len(training)])
rmse = mean_squared_error(dataset.target[len(training):], pipeline.predict(testing), squared=False)
print(rmse)

0.7033383507521878


In [22]:
# calculate variance, max error, mean squared error
variance = explained_variance_score(dataset.target[len(training):], pipeline.predict(testing))
max_error = max_error(dataset.target[len(training):], pipeline.predict(testing))
mse = mean_squared_error(dataset.target[len(training):], pipeline.predict(testing))
mabse = mean_absolute_error(dataset.target[len(training):], pipeline.predict(testing))
print(variance, max_error, mabse,mse)

0.6605500501742704 7.260453292958351 0.5168526993787043 0.49468483563880744


In [24]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', SGDRegressor(random_state=0))
])
# calculate bias
pipeline.fit(training, dataset.target[:len(training)])
bias = pipeline['model'].intercept_[0]
print(bias)

2.0112392087532647


In [25]:
# What are the coefficients in predicted model?
coefficients = pipeline['model'].coef_
print(coefficients)

[ 0.84046697  0.112331   -0.41213039  0.21595971 -0.01781887 -0.01480892
 -0.87394103 -0.83913104]
