## 随机梯度下降

In [1]:
import scipy.sparse as sp
import numpy as np

定义一个凸目标函数：
$$
\min_{x} \frac{1}{2} ||Ax-b||^{2}
$$

定义一个读取数据的函数`Fun()`, 读取参数$A$和$b$, 以及计算目标函数值的`min_f`:

In [2]:
class Fun(object):
    def __init__(self, path_a, path_b):
        self.file_a_path = path_a
        self.file_b_path = path_b
        self.A, self.b = self._get_parameter()
        self.A_T = self.A.transpose()
        self.x_init = sp.eye(self.A.shape[-1], self.b.shape[-1]).tocsr()  # 设置初始解

    def min_f(self, x):
        y = self.A * x - self.b
        return np.linalg.norm(y.toarray(), ord=2)

    def _get_parameter(self):
        self.A = sp.load_npz(self.file_a_path).tocsr()  # shape = (15935, 62061)
        self.b = sp.load_npz(self.file_b_path).tocsr()  # shape = (15935, 1)
        return self.A, self.b

随机梯度下降的迭代公式：

$$
x_{k+1} = x_{k} - \eta ((a_{i} x_{k} - b_{i})*a_{i}))^{T}
$$

其中$a_{i}$为$A$的第$i$行。

因为$A$的维度为`(15935, 62061)`，$b$的维度为`(15935, 1)`。所以$x$的维度为`(62061, 1)`，$x_{k}$的维度为`(62061, 1)`。$a_{i}$的维度为`(1, 62061)`，$a_{i}^{T}x_{k} - b_{i}$的纬度为`(1, 1)`。$(a_{i} x_{k} - b_{i})*a_{i})$的维度为`(1, 62061)`, 其转置之后，能够与$x_{k}$对齐。

In [3]:
class StochasticGradientDescent(Fun):
    def __init__(self, path_a, path_b):
        super(StochasticGradientDescent, self).__init__(path_a, path_b)

    def stochastic_gradient_decs(self, eta=0.002, iter_times=1, x_input=None):

        x_output = None
        y_input = self.min_f(x_input)
        for i in range(iter_times):
            row = np.random.randint(0, self.A.shape[0])
            a_i = self.A[row]
            b_i = self.b[row]
            x_output = x_input - eta * ((a_i * x_input - b_i) * a_i).transpose()
            x_input = x_output  # 更新 x 的值
            y_output = self.min_f(x_output)
            print("pre_y is {}  and y is {}".format(y_input, y_output))
            y_input = y_output

        return x_output

In [4]:
if __name__ == "__main__":

    StochasticGD = StochasticGradientDescent(path_a='./news20_A.npz', path_b='./news20_b.npz')

    StochasticGD.stochastic_gradient_decs(eta=0.002, iter_times=20, x_input=StochasticGD.x_init)

    print('')

pre_y is 15.722300259397239  and y is 15.72229919282442
pre_y is 15.72229919282442  and y is 15.721925969968366
pre_y is 15.721925969968366  and y is 15.721258121764754
pre_y is 15.721258121764754  and y is 15.721258124900542
pre_y is 15.721258124900542  and y is 15.719601452616311
pre_y is 15.719601452616311  and y is 15.719107249592593
pre_y is 15.719107249592593  and y is 15.71883839262038
pre_y is 15.71883839262038  and y is 15.71767159781624
pre_y is 15.71767159781624  and y is 15.716353277861108
pre_y is 15.716353277861108  and y is 15.716353285199698
pre_y is 15.716353285199698  and y is 15.716382860315008
pre_y is 15.716382860315008  and y is 15.716410372352442
pre_y is 15.716410372352442  and y is 15.716556898103844
pre_y is 15.716556898103844  and y is 15.715872518165037
pre_y is 15.715872518165037  and y is 15.71599996536192
pre_y is 15.71599996536192  and y is 15.716012153415326
pre_y is 15.716012153415326  and y is 15.71618139681003
pre_y is 15.71618139681003  and y is 15.