In [1]:
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np

# 导入数据
x_all = pd.read_csv('data/random_data_regression_X.csv', header=None)
y_all = pd.read_csv('data/random_data_regression_y.csv', header=None)
train_x = x_all[:40]
train_y = y_all[:40]
test_x = x_all[40:]
test_y = y_all[40:]

In [2]:
# 封装梯度下降法(包含正则化项)
class GradientDescentLinerRegressionWithRegularization:
    def __init__(self, learning_rate=0.01, _lambda=0.1, min_grad=0.001, max_item=5000, seed=None):
        np.random.seed(seed)
        self._lambda = _lambda
        self.lr = learning_rate  # 设置学习率
        self.loss_arr = []
        self.intercept_ = None
        self.coef_ = None
        self.min_grad = min_grad
        self.max_item = max_item

    def fit(self, x, y):
        self.x = x
        self.x_b = np.hstack([np.ones((len(x), 1)), x])
        self.x_b_dim = np.size(self.x_b, 1)
        self.x_sample = np.size(self.x_b, 0)
        self.y = y
        self.w = np.random.normal(1, 0.001, (self.x_b_dim, 1))

        for i in range(self.max_item):
            d_w = self._train_step()
            self.loss_arr.append(self.loss())
            print("iter: {}, grad: {:.5f}, loss_in_train: {:.5f}, loss_in_test: {:.5f}".format(i + 1, np.average(d_w),
                                                                                               self.loss(is_test=False),
                                                                                               self.loss_arr[-1]))
            i += 1

            if np.average(d_w) < self.min_grad:
                break

    def _f(self, x, w):
        return x.dot(w)

    def predict(self, x=None):
        if x is None:
            x = self.x_b
        x = np.hstack([np.ones((len(x), 1)), x])
        y_pred = self._f(x, self.w)
        return y_pred

    def loss(self, is_test=True, y_true=None, y_pred=None):
        if y_true is None or y_pred is None:
            y_true = self.y
        if is_test:
            return np.sqrt(mean_squared_error(test_y, self.predict(test_x)))
        else:
            return np.sqrt(mean_squared_error(y_true, self.predict(self.x)))

    def _calc_gradient(self):
        d_w = np.empty(self.x_b_dim).reshape(-1, 1)
        d_w[0] = np.sum(self.x_b.dot(self.w) - self.y)
        for i in range(1, self.x_b_dim):
            d_w[i] = np.squeeze((self.x_b.dot(self.w) - self.y)).dot(self.x_b[:, i].T)
        return d_w * 2 / self.x_sample + (self._lambda / self.x_sample * self.w)

    def _train_step(self):
        d_w = self._calc_gradient()
        self.w = self.w - self.lr * d_w
        self.intercept_ = self.w[0]
        self.coef_ = self.w[1:]
        return self.w

In [3]:
reg = GradientDescentLinerRegressionWithRegularization(learning_rate=1e-3, _lambda=100, min_grad=0.1, max_item=5000, seed=1024)
reg.fit(train_x, train_y)

iter: 1, grad: 0.99524, loss_in_train: 30.00431, loss_in_test: 35.12043
iter: 2, grad: 0.99052, loss_in_train: 29.21537, loss_in_test: 35.16077
iter: 3, grad: 0.98587, loss_in_train: 28.44995, loss_in_test: 35.20093
iter: 4, grad: 0.98129, loss_in_train: 27.70729, loss_in_test: 35.24087
iter: 5, grad: 0.97679, loss_in_train: 26.98667, loss_in_test: 35.28055
iter: 6, grad: 0.97235, loss_in_train: 26.28739, loss_in_test: 35.31994
iter: 7, grad: 0.96799, loss_in_train: 25.60878, loss_in_test: 35.35899
iter: 8, grad: 0.96369, loss_in_train: 24.95019, loss_in_test: 35.39768
iter: 9, grad: 0.95945, loss_in_train: 24.31099, loss_in_test: 35.43598
iter: 10, grad: 0.95527, loss_in_train: 23.69056, loss_in_test: 35.47386
iter: 11, grad: 0.95116, loss_in_train: 23.08833, loss_in_test: 35.51129
iter: 12, grad: 0.94710, loss_in_train: 22.50373, loss_in_test: 35.54827
iter: 13, grad: 0.94310, loss_in_train: 21.93619, loss_in_test: 35.58476
iter: 14, grad: 0.93916, loss_in_train: 21.38520, loss_in_te

In [4]:
reg = GradientDescentLinerRegressionWithRegularization(learning_rate=1e-3, _lambda=100, min_grad=0.01, max_item=5000, seed=1024)
reg.fit(train_x, train_y)

iter: 1, grad: 0.99524, loss_in_train: 30.00431, loss_in_test: 35.12043
iter: 2, grad: 0.99052, loss_in_train: 29.21537, loss_in_test: 35.16077
iter: 3, grad: 0.98587, loss_in_train: 28.44995, loss_in_test: 35.20093
iter: 4, grad: 0.98129, loss_in_train: 27.70729, loss_in_test: 35.24087
iter: 5, grad: 0.97679, loss_in_train: 26.98667, loss_in_test: 35.28055
iter: 6, grad: 0.97235, loss_in_train: 26.28739, loss_in_test: 35.31994
iter: 7, grad: 0.96799, loss_in_train: 25.60878, loss_in_test: 35.35899
iter: 8, grad: 0.96369, loss_in_train: 24.95019, loss_in_test: 35.39768
iter: 9, grad: 0.95945, loss_in_train: 24.31099, loss_in_test: 35.43598
iter: 10, grad: 0.95527, loss_in_train: 23.69056, loss_in_test: 35.47386
iter: 11, grad: 0.95116, loss_in_train: 23.08833, loss_in_test: 35.51129
iter: 12, grad: 0.94710, loss_in_train: 22.50373, loss_in_test: 35.54827
iter: 13, grad: 0.94310, loss_in_train: 21.93619, loss_in_test: 35.58476
iter: 14, grad: 0.93916, loss_in_train: 21.38520, loss_in_te

KeyboardInterrupt: 