In [42]:
import numpy as np

In [43]:
#题目描述：

#假设我们有一个简单的两层全连接神经网络，用于回归任务。网络结构如下：

#输入层：2个神经元
#隐藏层：3个神经元，使用 ReLU 激活函数
#输出层：1个神经元，线性输出

In [44]:
#relu函数
def relu(a):
  return np.maximum(0, a)

In [45]:
#前向传播，求y_pred

def forward(W1, W2, X, b1, b2):
  Z = np.dot(X, W1.T) + b1.T
  H = relu(Z)

  y_pred = np.dot(H, W2.T) + b2
  return Z, H, y_pred

In [46]:
#求损失函数loss的值

def compute_loss(y_pred, y_true):
  loss = np.mean((y_pred - y_true)**2) / 2
  return loss

In [55]:
#反向传播,梯度计算
def backward(y_pred, y_true, W2, H, Z, X):
  y_diff = y_pred - y_true
  b2_grad = np.mean((y_diff), axis=0, keepdims=True)

  W2_grad = np.dot(y_diff.T, H)

  dw2 = np.dot((y_pred-y_true), W2)
  dh = dw2 * (Z>0)
  b1_grad = np.mean(dh, axis=0, keepdims=True).T
  W1_grad = np.dot(dh.T, X)

  return W1_grad, W2_grad, b1_grad, b2_grad

In [48]:
def update_params(W1, W2, b1, b2, W1_grad, W2_grad, b1_grad, b2_grad, learning_rate):
  W1 -= learning_rate * W1_grad
  W2 -= learning_rate * W2_grad

  b1 -= learning_rate * b1_grad
  b2 -= learning_rate * b2_grad
  return W1, W2, b1, b2

In [56]:
# 数据
X = np.array([[0.5, 0.3],
        [0.8, 0.6],
        [0.2, 0.9]])

y_true = np.array([[0.8], [1.2], [0.5]])

# 初始参数
W1 = np.array([[0.1, 0.2],
        [0.3, 0.4],
        [0.5, 0.6]])

b1 = np.array([[0.1], [0.2], [0.3]])
W2 = np.array([[0.7, 0.8, 0.9]])
b2 = np.array([[0.4]])

# 学习率
learning_rate = 0.01

epochs = 100

for epoch in range(epochs):
  Z, H, y_pred = forward(W1, W2, X, b1, b2)
  loss = compute_loss(y_pred, y_true)
  W1_grad, W2_grad, b1_grad, b2_grad = backward(y_pred, y_true, W2, H, Z, X)
  W1, W2, b1, b2 = update_params(W1, W2, b1, b2, W1_grad, W2_grad, b1_grad, b2_grad, learning_rate)

  print(
    "Epoch:", epoch + 1,
    "loss", np.round(loss, 4),
    "W1_grad", np.round(W1_grad.flatten(), 2),
    "W2_grad", np.round(W2_grad.flatten(), 2),
    "b1_grad", np.round(b1_grad.flatten(), 2),
    "b2_grad", np.round(b2_grad.flatten(), 2)
  )


Epoch: 1 loss 0.5902 W1_grad [0.98 1.46 1.13 1.67 1.27 1.88] W2_grad [0.87 1.88 2.9 ] b1_grad [0.73 0.84 0.94] b2_grad [1.05]
Epoch: 2 loss 0.4821 W1_grad [0.86 1.3  0.97 1.47 1.08 1.64] W2_grad [0.72 1.62 2.52] b1_grad [0.65 0.73 0.82] b2_grad [0.94]
Epoch: 3 loss 0.3991 W1_grad [0.76 1.17 0.84 1.31 0.93 1.45] W2_grad [0.6  1.41 2.21] b1_grad [0.58 0.65 0.72] b2_grad [0.85]
Epoch: 4 loss 0.3344 W1_grad [0.67 1.06 0.74 1.18 0.81 1.29] W2_grad [0.51 1.23 1.96] b1_grad [0.52 0.58 0.63] b2_grad [0.77]
Epoch: 5 loss 0.2831 W1_grad [0.59 0.97 0.65 1.06 0.71 1.16] W2_grad [0.43 1.09 1.74] b1_grad [0.47 0.52 0.56] b2_grad [0.7]
Epoch: 6 loss 0.242 W1_grad [0.53 0.89 0.57 0.96 0.62 1.04] W2_grad [0.37 0.96 1.56] b1_grad [0.43 0.47 0.5 ] b2_grad [0.64]
Epoch: 7 loss 0.2088 W1_grad [0.47 0.82 0.51 0.88 0.55 0.95] W2_grad [0.32 0.86 1.4 ] b1_grad [0.39 0.42 0.45] b2_grad [0.59]
Epoch: 8 loss 0.1816 W1_grad [0.42 0.75 0.45 0.81 0.48 0.86] W2_grad [0.27 0.77 1.27] b1_grad [0.36 0.38 0.41] b2_grad [