In [1]:
import numpy as np

In [7]:
X = np.random.rand(2)
W = np.random.rand(2,3)
B = np.random.rand(3)

print(X.shape, X)
print(W.shape, W)
print(B.shape, B)

(2,) [0.86265914 0.07055705]
(2, 3) [[0.41418177 0.84819412 0.39446697]
 [0.11157898 0.37116207 0.88790638]]
(3,) [0.31705917 0.44045286 0.40781348]


In [13]:
# [x1, x2] * [[w11, w12, w13], [w21, w22, w23]] =
# x1 * w11 + x2 * w21, x1 * w12 + x2 * w22, x1 * w13 + x2 * w23

x1,x2 = 0.86265914,0.07055705
w11,w12,w13 = 0.41418177,0.84819412,0.39446697
w21,w22,w23 = 0.11157898,0.37116207,0.88790638

print(x1*w11+x2*w21, x1*w12+x2*w22, x1*w13+x2*w23)
print(np.dot(X, W))

Y = np.dot(X, W) + B
print(Y)

0.3651703731826868 0.7578905108433502 0.4029385919475848
[0.36517038 0.75789051 0.40293859]
[0.68222955 1.19834337 0.81075207]


正向传播的式子：<p/>
$ X * W + B = Y $ <p/>
L 相对 X 的导数为：<p/>
$
\dfrac{\delta L}{\delta X} = \dfrac{\delta L}{\delta Y} \times W^{T} 
$ <p/>
L 相对 W 的导数为：<p/>
$
\dfrac{\delta L}{\delta W} = X^{T} \times \dfrac{\delta L}{\delta Y}
$

In [15]:
dY = np.array([[1,2,3],[4,5,6]])
print(dY)
dB = np.sum(dY, axis=0)
print(dB)

[[1 2 3]
 [4 5 6]]
[5 7 9]


In [None]:
class Affine:
    
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.X = None
        self.dW = None
        self.db = None
        
    def forward(self, X)
        self.X = X
        out = np.dot(X, self.W) + self.b
        return out
    
    def backward(self, dout):
        # y -> X = dout * self.W.T
        dX = np.dot(dout, self.W.T)
        # y -> W = self.X.T * dout
        self.dW = np.dot(self.X.T, dout)
        # y -> b = sum(dout)
        self.db = np.sum(dout, axis=0)
        return dX