In [1]:
# 对于线性回归来说，主要的问题是对异常值太敏感，但是在现实当中，异常值经常出现
# 异常值通常会使线性回归的效果大打折扣
# 所以可以使用岭回归来消除削弱异常值的影响

# 准备一些数据

with open('data_multivar.txt', 'r') as f:
    lines = f.readlines()

In [2]:
# 先创建一个线性回归器

from sklearn import linear_model
import numpy as np

X = []
y = []

for line in lines:
    data = [float(i) for i in line.split(',')]
    X.append(data[:-1])
    y.append(data[-1])

num_training = int(0.8 * len(X))
num_test = len(X) - num_training

X_train = np.array(X[:num_training])
y_train = np.array(y[:num_training])

X_test = np.array(X[num_training:])
y_test = np.array(y[num_training:])

linregr = linear_model.LinearRegression()
linregr.fit(X_train, y_train)

y_test_pred = linregr.predict(X_test)

import sklearn.metrics as sm

print("Linear")
print("Mean absolute error =", round(sm.mean_absolute_error(y_test, y_test_pred), 2))
print("Mean squared error =", round(sm.mean_squared_error(y_test, y_test_pred), 2))
print("Median absolute error =", round(sm.median_absolute_error(y_test, y_test_pred), 2))
print("Explained variance score =", round(sm.explained_variance_score(y_test, y_test_pred), 2))
print("R2 score =", round(sm.r2_score(y_test, y_test_pred), 2))

Mean absolute error = 3.95
Mean squared error = 23.15
Median absolute error = 3.69
Explained variance score = 0.84
R2 score = 0.83


In [3]:
# 创建岭回归器

ridge_regressor = linear_model.Ridge(alpha=0.01, fit_intercept=True, max_iter=10000)
ridge_regressor.fit(X_train, y_train)

y_test_pred_ridge = ridge_regressor.predict(X_test)

print("Ridge")
print("Mean absolute error =", round(sm.mean_absolute_error(y_test, y_test_pred_ridge), 2))
print("Mean squared error =", round(sm.mean_squared_error(y_test, y_test_pred_ridge), 2))
print("Median absolute error =", round(sm.median_absolute_error(y_test, y_test_pred_ridge), 2))
print("Explained variance score =", round(sm.explained_variance_score(y_test, y_test_pred_ridge), 2))
print("R2 score =", round(sm.r2_score(y_test, y_test_pred_ridge), 2))

Mean absolute error = 3.95
Mean squared error = 23.15
Median absolute error = 3.69
Explained variance score = 0.84
R2 score = 0.83
