In [1]:
#考虑一个单隐藏层的神经网络，包含以下结构：

#输入层：3个神经元。
#隐藏层：2个神经元，使用 ReLU 激活函数。
#输出层：1个神经元，使用线性激活函数（即无激活函数）。
#我们将使用均方误差（Mean Squared Error, MSE）作为损失函数。

In [2]:
import numpy as np

In [4]:
#计算relu函数
def relu(a):
  return np.maximum(0, a)

In [7]:
#计算前向传播,输出y_pred
def forward(W1, W2, b1, b2, X):
  Z = np.dot(X, W1) + b1
  H = relu(Z)

  y_pred = np.dot(W2, H) + b2
  return Z, H, y_pred

In [9]:
#求损失函数loss
def compute_loss(y_pred, y_true):
  loss = (y_pred - y_true)**2 / 2
  return loss

In [16]:
#反向传导，求梯度
def backward(y_pred, y_true, W2, H, Z, X):
  b2_grad = y_pred - y_true
  W2_grad = b2_grad * H

  bh = b2_grad * W2
  b1_grad = bh * (Z>0)
  W1_grad = np.outer(X, b1_grad)

  return W1_grad, W2_grad, b1_grad, b2_grad

In [17]:
#更新参数，weight, bias
def update_params(W1, W2, b1, b2, W1_grad, W2_grad, b1_grad, b2_grad, lr):
  W1 -= lr * W1_grad
  W2 -= lr * W2_grad
  b1 -= lr * b1_grad
  b2 -= lr * b2_grad
  return W1, W2, b1, b2

In [18]:
X= np.array([1.0, 0.5, -0.5]) #shape (3,)

W1= np.array([[0.2, 0.1],
      [0.3, -0.4],
      [0.5, 0.2]])  #shape (3,2)

b1 = np.array([0.1, 0.0]) #shape (2,)

W2 = np.array([0.4, 0.6]) #shape (2,)

b2 = 0.2

y_true = 1.5

lr = 0.01

epochs = 100

for epoch in range(epochs):
  Z, H, y_pred = forward(W1, W2, b1, b2, X)
  loss = compute_loss(y_pred, y_true)
  W1_grad, W2_grad, b1_grad, b2_grad = backward(y_pred, y_true, W2, H, Z, X)
  W1, W2, b1, b2 = update_params(W1, W2, b1, b2, W1_grad, W2_grad, b1_grad, b2_grad, lr)

  print("Epoch:", epoch + 1,
    "loss:", np.round(loss, 4),
    "W1_grad", np.round(W1_grad.flatten(), 2),
    "W2_grad", np.round(W2_grad.flatten(), 2),
    "b1_grad", np.round(b1_grad.flatten(), 2),
    "b2_grad", np.round(b2_grad.flatten(), 2)
  )

Epoch: 1 loss: 0.7442 W1_grad [-0.49 -0.   -0.24 -0.    0.24  0.  ] W2_grad [-0.24 -0.  ] b1_grad [-0.49 -0.  ] b2_grad [-1.22]
Epoch: 2 loss: 0.7229 W1_grad [-0.48 -0.   -0.24 -0.    0.24  0.  ] W2_grad [-0.26 -0.  ] b1_grad [-0.48 -0.  ] b2_grad [-1.2]
Epoch: 3 loss: 0.702 W1_grad [-0.48 -0.   -0.24 -0.    0.24  0.  ] W2_grad [-0.27 -0.  ] b1_grad [-0.48 -0.  ] b2_grad [-1.18]
Epoch: 4 loss: 0.6816 W1_grad [-0.48 -0.   -0.24 -0.    0.24  0.  ] W2_grad [-0.28 -0.  ] b1_grad [-0.48 -0.  ] b2_grad [-1.17]
Epoch: 5 loss: 0.6617 W1_grad [-0.47 -0.   -0.24 -0.    0.24  0.  ] W2_grad [-0.29 -0.  ] b1_grad [-0.47 -0.  ] b2_grad [-1.15]
Epoch: 6 loss: 0.6422 W1_grad [-0.47 -0.   -0.23 -0.    0.23  0.  ] W2_grad [-0.29 -0.  ] b1_grad [-0.47 -0.  ] b2_grad [-1.13]
Epoch: 7 loss: 0.6231 W1_grad [-0.46 -0.   -0.23 -0.    0.23  0.  ] W2_grad [-0.3 -0. ] b1_grad [-0.46 -0.  ] b2_grad [-1.12]
Epoch: 8 loss: 0.6044 W1_grad [-0.46 -0.   -0.23 -0.    0.23  0.  ] W2_grad [-0.31 -0.  ] b1_grad [-0.46 -0.