## 加速梯度下降

In [1]:
import scipy.sparse as sp
import numpy as np

定义一个凸目标函数：
$$
\min_{x} \frac{1}{2} ||Ax-b||^{2}
$$

定义一个读取数据的函数`Fun()`, 读取参数$A$和$b$, 以及计算目标函数值的`min_f`:

In [2]:
class Fun(object):
    def __init__(self, path_a, path_b):
        self.file_a_path = path_a
        self.file_b_path = path_b
        self.A, self.b = self._get_parameter()
        self.A_T = self.A.transpose()
        self.x_init = sp.eye(self.A.shape[-1], self.b.shape[-1]).tocsr()  # 设置初始解

    def min_f(self, x):
        y = self.A * x - self.b
        return np.linalg.norm(y.toarray(), ord=2)

    def _get_parameter(self):
        self.A = sp.load_npz(self.file_a_path).tocsr()  # shape = (15935, 62061)
        self.b = sp.load_npz(self.file_b_path).tocsr()  # shape = (15935, 1)
        return self.A, self.b

梯度下降的迭代公式：

$$
y_{k+1} = x_{x}+\frac{k-1}{k+2}(x_{k}-x_{k-1}) \\
x_{k+1} = y_{k+1} - \eta A^{T}(Ay_{k+1}-b)
$$

因为$A$的维度为`(15935, 62061)`，$b$的维度为`(15935, 1)`。所以$x$的维度为`(62061, 1)`，$y_{k+1}$的维度为`(62061, 1)`。$Ay_{k+1}-b$的纬度为`(15935, 1)`。$A^{T}$的维度为`(62061, 15935)`, $A^{T}(Ay_{k+1}-b)$的维度`(62061, 1)`能够与$x_{k}$对齐。


In [3]:
class AccelerateGradientDescent(Fun):
    def __init__(self, path_a, path_b):
        super(AccelerateGradientDescent, self).__init__(path_a, path_b)
        self.x_pre = sp.eye(self.A.shape[-1], self.b.shape[-1]).tocsr()  # 设置x_{k-1}
        self.k = None

    def acc_gradient_decs(self, eta=0.002, iter_times=1, x_input=None):
        self.k = 1
        x_output = None
        y_input = self.min_f(x_input)
        for i in range(iter_times):
            y_k_1 = x_input + (self.k - 1)/(self.k + 2) * (x_input - self.x_pre)
            x_output = y_k_1 - eta * self.A_T * (self.A * y_k_1 - self.b)
            self.x_pre = x_input
            x_input = x_output  # 更新 x 的值
            y_output = self.min_f(x_output)
            print("pre_y is {}  and y is {}".format(y_input, y_output))
            y_input = y_output
            self.k += 1
        return x_output

In [4]:
if __name__ == "__main__":

    AccGD = AccelerateGradientDescent(path_a='./news20_A.npz', path_b='./news20_b.npz')

    AccGD.acc_gradient_decs(eta=0.002, iter_times=20, x_input=AccGD.x_init)

    print('')

pre_y is 15.722300259397239  and y is 13.255437472728753
pre_y is 13.255437472728753  and y is 12.735302978524988
pre_y is 12.735302978524988  and y is 12.307633153911993
pre_y is 12.307633153911993  and y is 11.955413213701904
pre_y is 11.955413213701904  and y is 11.650605664213087
pre_y is 11.650605664213087  and y is 11.36805046757616
pre_y is 11.36805046757616  and y is 11.092384589510115
pre_y is 11.092384589510115  and y is 10.81782545703838
pre_y is 10.81782545703838  and y is 10.544467243141199
pre_y is 10.544467243141199  and y is 10.274496509332158
pre_y is 10.274496509332158  and y is 10.009923575915268
pre_y is 10.009923575915268  and y is 9.751882549700937
pre_y is 9.751882549700937  and y is 9.500825818496821
pre_y is 9.500825818496821  and y is 9.256927535312608
pre_y is 9.256927535312608  and y is 9.020336311075038
pre_y is 9.020336311075038  and y is 8.791231346844746
pre_y is 8.791231346844746  and y is 8.569781302129703
pre_y is 8.569781302129703  and y is 8.3561012