# 1.定义评价指标

In [1]:
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import LinearRegression
import numpy as np


def MAE(y, y_pre):
    return np.mean(np.abs(y - y_pre))


def MSE(y, y_pre):
    return np.mean((y - y_pre) ** 2)


def RMSE(y, y_pre):
    return np.sqrt(MSE(y, y_pre))


def MAPE(y, y_pre):
    return np.mean(np.abs((y - y_pre) / y))


def R2(y, y_pre):
    u = np.sum((y - y_pre) ** 2)
    v = np.sum((y - np.mean(y)) ** 2)
    return 1 - (u / v)

# 2. 载入数据

In [4]:
def load_data():
    data = fetch_california_housing()
    x = data.data
    y = data.target
    return x, y

x,y = load_data()
print(x,y)

[[   8.3252       41.            6.98412698 ...    2.55555556
    37.88       -122.23      ]
 [   8.3014       21.            6.23813708 ...    2.10984183
    37.86       -122.22      ]
 [   7.2574       52.            8.28813559 ...    2.80225989
    37.85       -122.24      ]
 ...
 [   1.7          17.            5.20554273 ...    2.3256351
    39.43       -121.22      ]
 [   1.8672       18.            5.32951289 ...    2.12320917
    39.43       -121.32      ]
 [   2.3886       16.            5.25471698 ...    2.61698113
    39.37       -121.24      ]] [4.526 3.585 3.521 ... 0.923 0.847 0.894]


# 3.训练模型和验证

In [5]:
def train(x, y):
    model = LinearRegression()
    model.fit(x, y)
    y_pre = model.predict(x)
    print("model score: ", model.score(x, y))
    print("MAE: ", MAE(y, y_pre))
    print("MSE: ", MSE(y, y_pre))
    print("MAPE: ", MAPE(y, y_pre))
    print("R^2: ", R2(y, y_pre))


if __name__ == '__main__':
    x, y = load_data()
    train(x, y)


model score:  0.606232685199805
MAE:  0.5311643817546476
MSE:  0.5243209861846072
MAPE:  0.31715404597233426
R^2:  0.606232685199805
