In [23]:
import numpy as np
import pandas as pd

In [3]:
#计算relu函数
def relu(a):
  a = np.maximum(0, a)
  return a

In [9]:
#计算前向传播输出y_pred
def forward(X, W1, W2, B1, B2):
  Z = np.dot(W1, X) + B1
  H = relu(Z)

  y_pred = np.dot(H, W2.flatten()) + B2
  return H, Z, y_pred

In [7]:
#计算损失函数loss
def compute_loss(y_pred, y_true):
  loss = (y_pred - y_true) ** 2 / 2
  return loss

In [13]:
#反向传导计算梯度值
def backward(y_pred, y_true, X, H, Z):
  B2_grad = y_pred - y_true
  W2_grad = np.outer(B2_grad, H)

  B1_grad = B2_grad * W2.flatten() * (Z>0)
  W1_grad = np.outer(B1_grad, X)

  return W1_grad, W2_grad, B1_grad, B2_grad

In [20]:
#更新参数weight, bias
def update_param(learning_rate, W1, W2, B1, B2, W1_grad, W2_grad, B1_grad, B2_grad):
  W1 -= learning_rate * W1_grad
  W2 -= learning_rate * W2_grad

  B1 -= learning_rate * B1_grad
  B2 -= learning_rate * B2_grad

  return W1, W2, B1, B2

In [71]:
X = np.array([-1.0, 2.0, 1.5])      # 输入向量 (3,)
W1 = np.array([[0.1, -0.2, 0.4],    # 隐藏层权重 (2,3)
        [-0.5, 0.3, 0.2]])

B1 = np.array([0.0, 0.05])          # 隐藏层偏置 (2,)

W2 = np.array([[0.3, -0.7]])        # 输出层权重 (1,2)
B2 = np.array([0.1])                # 输出层偏置 (1,)

y_true = np.array([1.0])            # 标签
learning_rate = 0.01

gradian_history = []
epochs = 500

for epoch in range(epochs):
  H,Z, y_pred = forward(X, W1, W2, B1, B2)
  #print(f"隐藏节点输出:{H}, 输出Y结果:{y_pred}")

  loss = compute_loss(y_pred, y_true)
  #print(f"求损失函数结果,loss:{loss}")

  W1_grad, W2_grad, B1_grad, B2_grad = backward(y_pred, y_true, X, H, Z)
  #print(f"求各个参数的梯度:W1梯度{W1_grad.flatten()}, W2梯度{W2_grad}, B1梯度{B1_grad}, B2梯度{B2_grad}")

  gradian_history.append({
   "Epoch": epoch + 1,
   "W1_grad": W1_grad.flatten(),
   "W2_grad": W2_grad.flatten(),
   "B1_grad": B1_grad.flatten(),
   "B2_grad": B2_grad.flatten()
  })

  W1, W2, B1, B2 = update_param(learning_rate, W1, W2, B1, B2, W1_grad, W2_grad, B1_grad, B2_grad)
  #print(f"更新参数W1:{W1},W2:{W2},B1{B1},B2{B2}")

  print(
    "Epoch", epoch + 1,
    "Loss", round(float(loss), 8),
    "W1_grad", np.round(W1_grad.flatten(), 2),
    "W2_grad", np.round(W2_grad.flatten(), 2),
    "B1_grad", np.round(B1_grad.flatten(), 2),
    "B2_grad", np.round(B2_grad.flatten(), 2)
  )


Epoch 1 Loss 1.7766125 W1_grad [ 0.57 -1.13 -0.85 -1.32  2.64  1.98] W2_grad [-0.19 -2.73] B1_grad [-0.57  1.32] B2_grad [-1.89]
Epoch 2 Loss 1.51209452 W1_grad [ 0.52 -1.05 -0.79 -1.17  2.34  1.75] W2_grad [-0.26 -2.33] B1_grad [-0.52  1.17] B2_grad [-1.74]
Epoch 3 Loss 1.302699 W1_grad [ 0.49 -0.98 -0.74 -1.05  2.1   1.57] W2_grad [-0.31 -2.01] B1_grad [-0.49  1.05] B2_grad [-1.61]
Epoch 4 Loss 1.13329253 W1_grad [ 0.46 -0.93 -0.69 -0.95  1.89  1.42] W2_grad [-0.35 -1.74] B1_grad [-0.46  0.95] B2_grad [-1.51]
Epoch 5 Loss 0.99373155 W1_grad [ 0.44 -0.88 -0.66 -0.86  1.73  1.29] W2_grad [-0.38 -1.52] B1_grad [-0.44  0.86] B2_grad [-1.41]
Epoch 6 Loss 0.87698841 W1_grad [ 0.42 -0.83 -0.63 -0.79  1.58  1.19] W2_grad [-0.4  -1.34] B1_grad [-0.42  0.79] B2_grad [-1.32]
Epoch 7 Loss 0.77805649 W1_grad [ 0.4  -0.8  -0.6  -0.73  1.46  1.09] W2_grad [-0.42 -1.18] B1_grad [-0.4   0.73] B2_grad [-1.25]
Epoch 8 Loss 0.69328268 W1_grad [ 0.38 -0.76 -0.57 -0.67  1.35  1.01] W2_grad [-0.44 -1.04] B