In [1]:
import numpy as np

In [3]:
#计算relu函数
def relu(a):
  a = np.maximum(0, a)
  return a

In [9]:
#计算前向传播输出y_pred
def forward(X, W1, W2, B1, B2):
  Z = np.dot(W1, X) + B1
  H = relu(Z)

  y_pred = np.dot(H, W2.flatten()) + B2
  return H, Z, y_pred

In [7]:
#计算损失函数loss
def compute_loss(y_pred, y_true):
  loss = (y_pred - y_true) ** 2 / 2
  return loss

In [13]:
#反向传导计算梯度值
def backward(y_pred, y_true, X, H, Z):
  B2_grad = y_pred - y_true
  W2_grad = np.outer(B2_grad, H)

  B1_grad = B2_grad * W2.flatten() * (Z>0)
  W1_grad = np.outer(B1_grad, X)

  return W1_grad, W2_grad, B1_grad, B2_grad

In [20]:
#更新参数weight, bias
def update_param(learning_rate, W1, W2, B1, B2, W1_grad, W2_grad, B1_grad, B2_grad):
  W1 -= learning_rate * W1_grad
  W2 -= learning_rate * W2_grad

  B1 -= learning_rate * B1_grad
  B2 -= learning_rate * B2_grad

  return W1, W2, B1, B2

In [22]:
X = np.array([-1.0, 2.0, 1.5])      # 输入向量 (3,)
W1 = np.array([[0.1, -0.2, 0.4],    # 隐藏层权重 (2,3)
        [-0.5, 0.3, 0.2]])

B1 = np.array([0.0, 0.05])          # 隐藏层偏置 (2,)

W2 = np.array([[0.3, -0.7]])        # 输出层权重 (1,2)
B2 = np.array([0.1])                # 输出层偏置 (1,)

y_true = np.array([1.0])            # 标签
learning_rate = 0.01

epochs = 10

for epoch in range(epochs):
  H,Z, y_pred = forward(X, W1, W2, B1, B2)
  print(f"隐藏节点输出:{H}, 输出Y结果:{y_pred}")

  loss = compute_loss(y_pred, y_true)
  print(f"求损失函数结果,loss:{loss}")

  W1_grad, W2_grad, B1_grad, B2_grad = backward(y_pred, y_true, X, H, Z)
  print(f"求各个参数的梯度:W1梯度{W1_grad.flatten()}, W2梯度{W2_grad}, B1梯度{B1_grad}, B2梯度{B2_grad}")

  W1, W2, B1, B2 = update_param(learning_rate, W1, W2, B1, B2, W1_grad, W2_grad, B1_grad, B2_grad)
  print(f"更新参数W1:{W1},W2:{W2},B1{B1},B2{B2}")

隐藏节点输出:[0.1  1.45], 输出Y结果:[-0.885]
求损失函数结果,loss:[1.7766125]
求各个参数的梯度:W1梯度[ 0.5655  -1.131   -0.84825 -1.3195   2.639    1.97925], W2梯度[[-0.1885  -2.73325]], B1梯度[-0.5655  1.3195], B2梯度[-1.885]
更新参数W1:[[ 0.094345  -0.18869    0.4084825]
 [-0.486805   0.27361    0.1802075]],W2:[[ 0.301885  -0.6726675]],B1[0.005655 0.036805],B2[0.11885]
隐藏节点输出:[0.14665375 1.34114125], 输出Y结果:[-0.73901956]
求损失函数结果,loss:[1.51209452]
求各个参数的梯度:W1梯度[ 0.52498392 -1.04996784 -0.78747588 -1.16978194  2.33956389  1.75467291], W2梯度[[-0.25503374 -2.33227087]], B1梯度[-0.52498392  1.16978194], B2梯度[-1.73901956]
更新参数W1:[[ 0.08909516 -0.17819032  0.41635726]
 [-0.47510718  0.25021436  0.16266077]],W2:[[ 0.30443534 -0.64934479]],B1[0.01090484 0.02510718],B2[0.1362402]
隐藏节点输出:[0.18996492 1.24463424], 输出Y结果:[-0.61412453]
求损失函数结果,loss:[1.302699]
求各个参数的梯度:W1梯度[ 0.49139655 -0.98279309 -0.73709482 -1.04812336  2.09624671  1.57218503], W2梯度[[-0.30662704 -2.00899466]], B1梯度[-0.49139655  1.04812336], B2梯度[-1.61412453]
更新参数W1:[[ 0.0