# The below project implements a Single and/or Multi-Linear Regressor on the dataset "fetch_california_housing" from Scikit.

In [61]:
# Load the required modules
from sklearn.linear_model import LinearRegression
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import matplotlib.pyplot as plt

# Load the dataset and split it into train and test sets
dataset = fetch_california_housing()

X = dataset['data']
y = dataset['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
print (X_train.shape, X_test.shape)

# Below 3 lines of code to be used in case of single linear regressor!!
# plt.scatter(X[..., 0-7], y)
# plt.xlabel('California Census Block Group')
# plt.ylabel('Median house value (in hundreds of thousands of dollars)')

# Model development & learning
model = LinearRegression()
model.fit(X_train, y_train)
y_predict = model.predict(X_test)

# Below 3 lines of code to be used in case of single linear regressor!!
# plt.scatter(X[..., 0-7], y)
# plt.plot(X_test, y_predict, color = 'red')
# plt.xlabel('California Census Block Group')
# plt.ylabel('Median house value (in hundreds of thousands of dollars)')

# Compute y = (wX + b)
w = model.coef_
b = model.intercept_
print(w, b)

# Validating y = (wX + b)
if y_predict.all() == (w*X_test + b).all():
  print("The model prediction is Correct")
else:
  print("Error in prediction. Please check the dataset.")
print(y_predict, w*X_test + b)

# R2 and MSE
r2_score = r2_score(y_test, y_predict)
mse_error = mean_squared_error(y_test, y_predict)
print(r2_score, mse_error)

(15480, 8) (5160, 8)
[ 4.39091042e-01  9.59864665e-03 -1.03311173e-01  6.16730152e-01
 -7.63275197e-06 -4.48838256e-03 -4.17353284e-01 -4.30614462e-01] -36.609593778714334
The model prediction is Correct
[2.27826207 2.79606672 1.90887117 ... 2.15678592 3.03507742 2.89088222] [[-34.78657559 -36.39842355 -37.19465247 ... -36.62835788 -50.20696377
   13.79382901]
 [-34.07182319 -36.30243709 -37.24053847 ... -36.62315309 -50.76621717
   14.18999431]
 [-34.70011856 -36.33123303 -37.22230255 ... -36.62265548 -52.7402982
   15.85647228]
 ...
 [-35.01586893 -36.45601543 -36.98325398 ... -36.61806029 -50.88307609
   14.46558757]
 [-34.18875314 -36.25444385 -37.08412476 ... -36.62309696 -50.7578701
   14.34932166]
 [-34.51780796 -36.2640425  -37.15319839 ... -36.62263457 -52.32711845
   16.11484096]]
0.5911695436410476 0.5404128061709095
