In [21]:
#考虑一个简单的神经网络，用于回归任务。该网络结构如下：

#输入层： 1 个神经元
#隐藏层： 2 个神经元，使用 Sigmoid 激活函数
#输出层： 1 个神经元，线性输出

In [22]:
import numpy as np

In [23]:
#计算sigmoid激活函数
def sigmoid(a):
  return 1 / (1 + np.exp(-a))

In [24]:
#前向传播,求出y_pred的值
def forword(W1, W2, b1, b2, X):
  Z = np.dot(X, W1.T) + b1.T # shape (2,2)
  H = sigmoid(Z) # shape (2,2)

  y_pred = np.dot(H, W2.T) + b2

  return H, y_pred

In [25]:
#求损失函数loss值,
def compute_loss(y_true, y_pred):
  loss = (y_true - y_pred)**2 / 2
  return loss

In [26]:
#求反向传播，梯度计算
def backward(y_pred, y_true, H, W2, X):
  y_diff = y_pred - y_true # shape(2,1)

  b2_grad = np.mean(y_diff).T # shape(1,1)
  W2_grad = np.dot(y_diff.T, H)

  dh = np.dot(y_diff, W2) # shape(2,2)
  dz = dh * (1 - H**2) # shape(2,2)
  b1_grad = np.mean(dh, axis=0, keepdims=True).T # shape(2,1)
  W1_grad = np.dot(dz, X) # shape(2,1)

  return W1_grad, W2_grad, b1_grad, b2_grad

In [27]:
#更新参数weight, bias
def update_params(W1, W2, b1, b2, W1_grad, W2_grad, b1_grad, b2_grad, learning_rate):
  W1 -= learning_rate * W1_grad
  W2 -= learning_rate * W2_grad
  b1 -= learning_rate * b1_grad
  b2 -= learning_rate * b2_grad

  return W1, W2, b1, b2

In [29]:
X = np.array([[0.5],
        [1.0]]) # ndarray (2,1)

y_true = np.array([[0.75],
        [1.5]]) # ndarray (2,1)

W1 = np.array([[0.2],
        [0.3]]) # ndarray (2,1)

b1 = np.array([[0.1],
        [0.2]]) # ndarray (2,1)

W2 = np.array([[0.4, 0.5]]) # ndarray (1,2)
b2 = np.array([[0.6]]) # ndarray (1,1)

learning_rate = 0.01

epochs = 10000

for epoch in range(epochs):
  H, y_pred = forword(W1, W2, b1, b2, X)
  loss = compute_loss(y_true, y_pred)
  W1_grad, W2_grad, b1_grad, b2_grad = backward(y_pred, y_true, H, W2, X)
  W1, W2, b1, b2 = update_params(W1, W2, b1, b2, W1_grad, W2_grad, b1_grad, b2_grad, learning_rate)

  if epoch % 100 == 0:
    print("Epoch:", epoch + 1,
      "loss:", np.round(np.mean(loss), 4),
      "W1_grad:", np.round(W1_grad.flatten(), 2),
      "W2_grad:", np.round(W2_grad.flatten(), 2),
      "b1_grad:", np.round(b1_grad.flatten(), 2),
      "b2_grad:", np.round(b2_grad.flatten(), 2)
      )

Epoch: 1 loss: 0.0652 W1_grad: [ 0.17 -0.16] W2_grad: [-0.01 -0.01] b1_grad: [0. 0.] b2_grad: [0.]
Epoch: 101 loss: 0.0651 W1_grad: [ 0.17 -0.15] W2_grad: [ 0.   -0.01] b1_grad: [0. 0.] b2_grad: [0.01]
Epoch: 201 loss: 0.065 W1_grad: [ 0.17 -0.15] W2_grad: [ 0.01 -0.02] b1_grad: [0. 0.] b2_grad: [0.01]
Epoch: 301 loss: 0.0648 W1_grad: [ 0.17 -0.15] W2_grad: [ 0.02 -0.02] b1_grad: [0. 0.] b2_grad: [0.01]
Epoch: 401 loss: 0.0644 W1_grad: [ 0.17 -0.14] W2_grad: [ 0.03 -0.03] b1_grad: [0. 0.] b2_grad: [0.01]
Epoch: 501 loss: 0.0637 W1_grad: [ 0.17 -0.13] W2_grad: [ 0.03 -0.03] b1_grad: [0. 0.] b2_grad: [0.01]
Epoch: 601 loss: 0.0627 W1_grad: [ 0.16 -0.12] W2_grad: [ 0.04 -0.03] b1_grad: [0. 0.] b2_grad: [0.01]
Epoch: 701 loss: 0.0613 W1_grad: [ 0.16 -0.11] W2_grad: [ 0.04 -0.03] b1_grad: [0.   0.01] b2_grad: [0.01]
Epoch: 801 loss: 0.0597 W1_grad: [ 0.15 -0.1 ] W2_grad: [ 0.05 -0.03] b1_grad: [0.   0.01] b2_grad: [0.01]
Epoch: 901 loss: 0.0578 W1_grad: [ 0.14 -0.09] W2_grad: [ 0.05 -0.03] 