In [1]:
#考虑一个用于回归任务的简单神经网络，结构如下：

#输入层： 2 个神经元
#隐藏层： 2 个神经元，使用 ReLU 激活函数
#输出层： 1 个神经元，线性输出

In [2]:
import numpy as np

In [5]:
#relu激活函数
def relu(a):
  return np.maximum(0, a)

In [12]:
#前向传播，输出y_pred
def forward(W1, W2, b1, b2, X):
  Z = np.dot(X, W1.T) + b1 #shape (2,2)
  H = relu(Z) #shape (2,2)

  y_pred = np.dot(H, W2.T) + b2 #shape (2,1)

  return H, Z, y_pred

In [9]:
#计算损失loss函数
def compute_loss(y_pred, y_true):
  loss = (y_pred - y_true)**2 / 2
  return loss

In [23]:
#计算反向传播,梯度计算
def backward(y_pred, y_true, W2, H, Z, X):
  y_diff = y_pred - y_true #shape (2,1)

  b2_grad = np.mean(y_diff, axis=0, keepdims=True) #shape (1,1)
  W2_grad = np.dot(y_diff.T, H) #shepe (2,1)

  dh = np.dot(y_diff, W2) #shape (2,2)
  dz = dh * (Z>0)
  b1_grad = np.mean(dz, axis=0, keepdims=True).T #shape (2,1)
  W1_grad = np.dot(dz, X.T) #shape (2,2)

  return W1_grad, W2_grad, b1_grad, b2_grad

In [17]:
#更新参数weight, bias的值
def update_params(W1, W2, b1, b2, W1_grad, W2_grad, b1_grad, b2_grad):
  W1 -= learning_rate * W1_grad
  W2 -= learning_rate * W2_grad
  b1 -= learning_rate * b1_grad
  b2 -= learning_rate * b2_grad
  return W1, W2, b1, b2

In [24]:
X = np.array([[1.0, 2.0],
        [2.0, 1.0]])

y_true = np.array([[3.0],
        [2.5]])

W1 = np.array([[0.1, 0.2],
        [0.3, 0.4]]) #(输入层到隐藏层) ndarray (2,2)

b1 = np.array([[0.1],
        [0.2]]) #(隐藏层偏置) ndarray (2,1)

W2 = np.array([[0.5, 0.6]]) #(隐藏层到输出层) ndarray (1,2)

b2 = np.array([[0.3]]) #(输出层偏置) ndarray (1,1)


learning_rate = 0.05

epochs = 100

for epoch in range(epochs):
  H, Z, y_pred = forward(W1, W2, b1, b2, X)
  loss = compute_loss(y_pred, y_true)
  W1_grad, W2_grad, b1_grad, b2_grad = backward(y_pred, y_true, W2, H, Z, X)
  W1, W2, b1, b2 = update_params(W1, W2, b1, b2, W1_grad, W2_grad, b1_grad, b2_grad)

  print("Epoch:", epoch + 1,
    "loss:", np.round(np.mean(loss), 4),
    "W1_grad:", np.round(W1_grad.flatten(), 2),
    "W2_grad:", np.round(W2_grad.flatten(), 2),
    "b1_grad:", np.round(b1_grad.flatten(), 2),
    "b2_grad:", np.round(b2_grad.flatten(), 2)
    )


Epoch: 1 loss: 1.0537 W1_grad: [-2.86 -2.69 -2.01 -1.89] W2_grad: [-1.72 -3.43] b1_grad: [-0.72 -0.86] b2_grad: [-1.43]
Epoch: 2 loss: 0.2012 W1_grad: [-1.78 -1.63 -0.68 -0.62] W2_grad: [-1.22 -1.77] b1_grad: [-0.34 -0.45] b2_grad: [-0.58]
Epoch: 3 loss: 0.0386 W1_grad: [-0.8  -0.73  0.47  0.42] W2_grad: [-0.18 -0.23] b1_grad: [-0.05 -0.06] b2_grad: [-0.07]
Epoch: 4 loss: 0.0365 W1_grad: [-0.68 -0.62  0.61  0.56] W2_grad: [-0.04 -0.04] b1_grad: [-0.01 -0.01] b2_grad: [-0.02]
Epoch: 5 loss: 0.0365 W1_grad: [-0.7  -0.63  0.6   0.55] W2_grad: [-0.05 -0.05] b1_grad: [-0.01 -0.02] b2_grad: [-0.02]
Epoch: 6 loss: 0.0365 W1_grad: [-0.7  -0.63  0.6   0.55] W2_grad: [-0.05 -0.05] b1_grad: [-0.01 -0.02] b2_grad: [-0.02]
Epoch: 7 loss: 0.0366 W1_grad: [-0.7  -0.64  0.61  0.55] W2_grad: [-0.06 -0.05] b1_grad: [-0.01 -0.02] b2_grad: [-0.02]
Epoch: 8 loss: 0.0366 W1_grad: [-0.7  -0.64  0.61  0.55] W2_grad: [-0.06 -0.04] b1_grad: [-0.01 -0.02] b2_grad: [-0.02]
Epoch: 9 loss: 0.0366 W1_grad: [-0.7  -0