## 1.3.4 计算图

In [1]:
import numpy as np

### 1.3.4.3 Repeat节点

In [2]:
D, N = 8, 7

In [3]:
x = np.random.randn(1, D)  # 输入

In [4]:
y = np.repeat(x, N, axis=0)  # 正向传播

In [5]:
y

array([[-0.17808531,  1.39669766, -0.70718715,  1.98144475,  0.88642414,
        -0.52805048, -1.53257264, -0.71737293],
       [-0.17808531,  1.39669766, -0.70718715,  1.98144475,  0.88642414,
        -0.52805048, -1.53257264, -0.71737293],
       [-0.17808531,  1.39669766, -0.70718715,  1.98144475,  0.88642414,
        -0.52805048, -1.53257264, -0.71737293],
       [-0.17808531,  1.39669766, -0.70718715,  1.98144475,  0.88642414,
        -0.52805048, -1.53257264, -0.71737293],
       [-0.17808531,  1.39669766, -0.70718715,  1.98144475,  0.88642414,
        -0.52805048, -1.53257264, -0.71737293],
       [-0.17808531,  1.39669766, -0.70718715,  1.98144475,  0.88642414,
        -0.52805048, -1.53257264, -0.71737293],
       [-0.17808531,  1.39669766, -0.70718715,  1.98144475,  0.88642414,
        -0.52805048, -1.53257264, -0.71737293]])

In [6]:
dy = np.random.randn(N, D)  # 假设的梯度

In [7]:
dx = np.sum(dy, axis=0, keepdims=True)  # 反向传播

In [8]:
dx

array([[ 2.37796747, -1.63240079,  1.41024062,  0.21443381, -2.24055111,
         1.61517225,  0.2525587 , -2.86137176]])

In [None]:
"""
这里通过 np.repeat() 方法进行元素的复制。上面的例子中将复制 N 次
数组 x。通过指定 axis，可以指定沿哪个轴复制。因为反向传播时要计算
总和，所以使用 NumPy 的 sum() 方法。此时，通过指定 axis 来指定对哪
个轴求和。另外，通过指定 keepdims=True，可以维持二维数组的维数。在
上面的例子中，当 keepdims=True 时，np.sum() 的结果的形状是 (1, D)；当
keepdims=False 时，形状是 (D,)。
"""

### 1.3.4.4 Sum节点

In [10]:
D, N = 8, 7
x = np.random.randn(N, D)  # 输入

In [11]:
y = np.sum(x, axis=0, keepdims=True)  # 正向传播

In [12]:
dy = np.random.randn(1, D)  # 假设的梯度
dx = np.repeat(dy, N, axis=0)  # 反向传播

In [13]:
y

array([[ 1.65196777, -1.32061663, -1.13073388, -1.67569885,  3.15396898,
        -0.3089492 ,  1.2983359 ,  3.54507303]])

In [14]:
dx

array([[-1.58780172,  1.75302864, -0.31538166, -1.62901763, -2.20836159,
        -1.97943957, -0.51309289,  0.04613219],
       [-1.58780172,  1.75302864, -0.31538166, -1.62901763, -2.20836159,
        -1.97943957, -0.51309289,  0.04613219],
       [-1.58780172,  1.75302864, -0.31538166, -1.62901763, -2.20836159,
        -1.97943957, -0.51309289,  0.04613219],
       [-1.58780172,  1.75302864, -0.31538166, -1.62901763, -2.20836159,
        -1.97943957, -0.51309289,  0.04613219],
       [-1.58780172,  1.75302864, -0.31538166, -1.62901763, -2.20836159,
        -1.97943957, -0.51309289,  0.04613219],
       [-1.58780172,  1.75302864, -0.31538166, -1.62901763, -2.20836159,
        -1.97943957, -0.51309289,  0.04613219],
       [-1.58780172,  1.75302864, -0.31538166, -1.62901763, -2.20836159,
        -1.97943957, -0.51309289,  0.04613219]])

In [15]:
class MatMul:
    def __init__(self, W):
        self.params = [W]
        self.grads = [np.zeros_like(W)]
        self.x = None

    def forward(self, x):
        W, = self.params
        out = np.dot(x, W)
        self.x = x
        return out

    def backward(self, dout):
        W, = self.params
        dx = np.dot(dout, W.T)
        dW = np.dot(self.x.T, dout)
        self.grads[0][...] = dW
        return dx

## 1.3.5 梯度的推导和反向传播的实现
### 1.3.5.1 Sigmoid 层

In [16]:
class Sigmoid:
    def __init__(self):
        self.params, self.grads = [], []
        self.out = None

    def forward(self, x):
        out = 1 / (1 + np.exp(-x))
        self.out = out
        return out

    def backward(self, dout):
        dx = dout * (1.0 - self.out) * self.out
        return dx

In [17]:
class SGD:
    def __init__(self, lr=0.01):
        self.lr = lr

    def update(self, params, grads):
        for i in range(len(params)):
            params[i] -= self.lr * grads[i]

In [18]:
import sys
sys.path.append('..')

In [19]:
from dataset import spiral
import matplotlib.pyplot as plt

In [20]:
x, t = spiral.load_data()
print('x', x.shape) # (300, 2)
print('t', t.shape) # (300, 3)

x (300, 2)
t (300, 3)
