In [1]:
from sklearn.datasets import load_boston
boston = load_boston()
print(boston["DESCR"])  # 无缺失值

.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pu

In [2]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(boston["data"], boston["target"], test_size=0.25, random_state=33)

import numpy as np
print("The max target value is:", np.max(boston["target"]))
print("The min target value is:", np.min(boston["target"]))
print("The average target value is:", np.mean(boston["target"]))

The max target value is: 50.0
The min target value is: 5.0
The average target value is: 22.532806324110677


In [3]:
from sklearn.preprocessing import StandardScaler
ss_x = StandardScaler()
x_train = ss_x.fit_transform(x_train)
x_test = ss_x.transform(x_test)

ss_y = StandardScaler()
y_train = ss_y.fit_transform(y_train.reshape(len(y_train), 1))
y_test = ss_y.transform(y_test.reshape(len(y_test), 1))

In [4]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(x_train, y_train)
lr_y_predict = lr.predict(x_test)

In [5]:
from sklearn.linear_model import SGDRegressor
sgdr = SGDRegressor()
sgdr.fit(x_train, y_train)
sgdr_y_predict = sgdr.predict(x_test)

  y = column_or_1d(y, warn=True)


In [7]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

print("The value of default measurement of LinearRegression is:", lr.score(x_test, y_test)) # 就是r2_score
print("The value of R-squared of LinearRegression is:", r2_score(y_test, lr_y_predict))
print("The value of squared error of LinearRegression is:", mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(lr_y_predict)))
print("The mean absolute error of LinearRegression is:", mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(lr_y_predict)))

print("------------------------------------------------------------------------------")

print("The value of default measurement of SGCRegression is:", sgdr.score(x_test, y_test))
print("The value of R-squared of SGCRegression is:", r2_score(y_test, sgdr_y_predict))
print("The value of squared error of SGCRegression is:", mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(sgdr_y_predict)))
print("The mean absolute error of SGCRegression is:", mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(sgdr_y_predict)))


The value of default measurement of LinearRegression is: 0.6757955014529481
The value of R-squared of LinearRegression is: 0.6757955014529481
The value of squared error of LinearRegression is: 25.13923652035345
The mean absolute error of LinearRegression is: 3.5325325437053983
------------------------------------------------------------------------------
The value of default measurement of SGCRegression is: 0.6546706377019071
The value of R-squared of SGCRegression is: 0.6546706377019071
The value of squared error of SGCRegression is: 26.77728580306749
The mean absolute error of SGCRegression is: 3.5182735717362585
