In [69]:
import numpy as np

In [70]:
#定义relu激活函数
def relu(a):
  a = np.maximum(0, a)

  return a

In [71]:
#前向传播
def forward(X, W1, B1, W2, B2, W3, B3):
  Z1 = np.dot(W1, X) + B1
  H1 = relu(Z1)

  Z2 = np.dot(W2, H1) + B2
  H2 = relu(Z2)

  y_pred = np.dot(W3, H2) + B3

  return Z1, H1, Z2, H2, y_pred

In [72]:
#计算loss损失函数
def compute_loss():
  loss = (y_true - y_pred)**2 / 2
  return loss

In [73]:
#反向传播函数
def backward(y_pred, y_true, W1, W2, H1, H2, Z1, Z2, X):
  y_diff = y_pred - y_true

  #print(f"H1.shape:{H1.shape}, H2.shape:{H2.shape}")

  B3_grad = y_diff
  W3_grad = y_diff * H2

  B2_grad = y_diff * W3 * (Z2>0)
  W2_grad = np.outer(B2_grad, H1)
  B1_grad = y_diff * np.dot((W3 * (Z2>0)), W2) * (Z1>0)
  W1_grad = y_diff * W3 * (Z2>0) * W2.T * (np.reshape(Z1, (4,1))>0) * X

  #W2_grad = np.reshape(3,4)
  B1_grad = B1_grad.flatten()
  B2_grad = B2_grad.flatten()
  #W1_grad = np.reshape(4,3)

  return B3_grad, W3_grad, B2_grad, W2_grad, B1_grad, W1_grad

In [74]:
#更新weight, bias参数结果

def update_param(lr, W1, W2, W3, B1, B2, B3, W1_grad, W2_grad, W3_grad, B1_grad, B2_grad, B3_grad):

  W1 -= lr * W1_grad
  W2 -= lr * W2_grad
  W3 -= lr * W3_grad

  B1 -= lr * B1_grad
  B2 -= lr * B2_grad
  B3 -= lr * B3_grad

  return W1, W2, W3, B1, B2, B3

In [77]:
np.random.seed(42)  # 固定随机种子，方便对比结果

# 输入特征
X = np.array([1.5, -2.0, 3.0])  # (3,)

# 隐藏层 1（H1）的权重和偏置
W1 = np.random.randn(4, 3)  # (4,3)
B1 = np.random.randn(4)  # (4,)

# 隐藏层 2（H2）的权重和偏置
W2 = np.random.randn(3, 4)  # (3,4)
B2 = np.random.randn(3)  # (3,)

# 输出层的权重和偏置
W3 = np.random.randn(1, 3)  # (1,3)
B3 = np.random.randn(1)  # (1,)

# 目标值（真实值）
y_true = np.array([2.0])

# 学习率
lr = 0.01

print(f"X:\n{X}\n, (W1:\n{W1}\n, B1:\n{B1}\n,) (W2:\n{W2}\n, B2:\n{B2}\n,) (W3:\n{W3}\n, B3:\n{B3}\n)")

epochs = 10

for epoch in range(epochs):
  Z1, H1, Z2, H2, y_pred = forward(X, W1, B1, W2, B2, W3, B3)
  print(f"Z1:{Z1}, H1:{H1}, Z2:{Z2}, H2:{H2}, y_pred:{y_pred}")

  loss = compute_loss()
  print(f"loss:{loss}")

  B3_grad, W3_grad, B2_grad, W2_grad, B1_grad, W1_grad = backward(y_pred, y_true, W1, W2, H1, H2, Z1, Z2, X)
  print(f"B3_grad:{B3_grad}, W3_grad:{W3_grad}, B2_grad:{B2_grad}, W2_grad:{W2_grad}, B1_grad:{B1_grad}, W1_grad:{W1_grad}")

  W1, W2, W3, B1, B2, B3 = update_param(lr, W1, W2, W3, B1, B2, B3, W1_grad, W2_grad, W3_grad, B1_grad, B2_grad, B3_grad)
  print(f"param update: B3:{B3}, W3:{W3}, B2:{B2}, W2:{W2}, B1:{B1}, W1:{W1}")

X:
[ 1.5 -2.   3. ]
, (W1:
[[ 0.49671415 -0.1382643   0.64768854]
 [ 1.52302986 -0.23415337 -0.23413696]
 [ 1.57921282  0.76743473 -0.46947439]
 [ 0.54256004 -0.46341769 -0.46572975]]
, B1:
[ 0.24196227 -1.91328024 -1.72491783 -0.56228753]
,) (W2:
[[-1.01283112  0.31424733 -0.90802408 -1.4123037 ]
 [ 1.46564877 -0.2257763   0.0675282  -1.42474819]
 [-0.54438272  0.11092259 -1.15099358  0.37569802]]
, B2:
[-0.60063869 -0.29169375 -0.60170661]
,) (W3:
[[ 1.85227818 -0.01349722 -1.05771093]]
, B3:
[0.82254491]
)
Z1:[ 3.20662772  0.13716042 -2.29939123 -0.21880134], H1:[3.20662772 0.13716042 0.         0.        ], Z2:[-3.80530874  4.37712865 -2.33212516], H2:[0.         4.37712865 0.        ], y_pred:[0.76346582]
loss:[0.76450839]
B3_grad:[-1.23653418], W3_grad:[-0.         -5.41246917 -0.        ], B2_grad:[-0.          0.01668978  0.        ], W2_grad:[[-0.         -0.         -0.         -0.        ]
 [ 0.05351791  0.00228918  0.          0.        ]
 [ 0.          0.          0.      