In [None]:
import numpy as np
import pandas as pd

from sklearn.datasets import fetch_california_housing, load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [None]:
X, y = fetch_california_housing(as_frame=True, return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
print(X.shape, y.shape, X_train.shape, y_train.shape)

(20640, 8) (20640,) (14448, 8) (14448,)


* Import Linear regression
* fit the data split to earlier model
* Test and calculate the score

In [None]:
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)
y_predict = lin_reg.predict(X_test)
train_score = lin_reg.score(X_train, y_train)
test_score = lin_reg.score(X_test, y_test)
r2score = r2_score(y_test, y_predict)
print('train_score: ', train_score)
print('test_score: ', test_score)
print('r2_score: ', r2score)
print(lin_reg.intercept_)

train_score:  0.609345972797216
test_score:  0.595770232606166
r2_score:  0.595770232606166
-37.05624133152533


* 

In [None]:
X_diabetic, y_diabetic = load_diabetes(as_frame=True, return_X_y=True)
X_train1, X_test1, y_train1, y_test1 = train_test_split(X_diabetic, y_diabetic, test_size=0.4, random_state=42)

* Import LineraRegression model with intercept
* Fit the data
* Test and calculate the score with intercept

In [None]:
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression(fit_intercept=True)
lin_reg.fit(X_train1, y_train1)
y_predict1 = lin_reg.predict(X_test1)
train_score = lin_reg.score(X_train1, y_train1)
test_score = lin_reg.score(X_test1, y_test1)
r2score = r2_score(y_test1, y_predict1)
print('train_score: ', train_score)
print('test_score: ', test_score)
print('r2_score: ', r2score)
print('intercept: ', lin_reg.intercept_)
print('coeffients : ', lin_reg.coef_)

train_score:  0.5072199968937584
test_score:  0.5157444756897698
r2_score:  0.5157444756897698
intercept:  148.92850899668235
coeffients :  [  18.08383103 -227.04654841  592.2754776   361.54657801 -655.89624143
  353.71022539   14.40233952  142.86622578  594.01401521   31.67348554]


without intercept

In [None]:
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression(fit_intercept=False)
lin_reg.fit(X_train1, y_train1)
y_predict1 = lin_reg.predict(X_test1)
train_score = lin_reg.score(X_train1, y_train1)
test_score = lin_reg.score(X_test1, y_test1)
r2score = r2_score(y_test1, y_predict1)

print('test_score: ', test_score)

print('intercept: ', lin_reg.intercept_)
print('coeffients : ', lin_reg.coef_)

test_score:  -3.7861093338014173
intercept:  0.0
coeffients :  [  21.4116396  -251.46942516  805.83148774  287.96985345 -356.12098013
   19.57062432  115.66733098  210.7775154   508.31873399  141.85938945]


* load diabetics dataset
* split with test size = 0.4
* fit with SGDRegressor and calculate score

In [None]:
X_diabetic, y_diabetic = load_diabetes(as_frame=True, return_X_y=True)
X_train1, X_test1, y_train1, y_test1 = train_test_split(X_diabetic, y_diabetic, test_size=0.4, random_state=42)

from sklearn.linear_model import SGDRegressor
sgd = SGDRegressor()
sgd.fit(X_train1, y_train1)
y_predict1 = sgd.predict(X_test1)
train_score = sgd.score(X_train1, y_train1)
test_score = sgd.score(X_test1, y_test1)
r2score = r2_score(y_test1, y_predict1)

print('test_score: ', test_score)

test_score:  0.40554663828474036


