In [None]:
#假设有一个简单的神经网络，
#包含一个输入层（2个神经元）
#一个隐藏层（3个神经元，使用 ReLU 激活函数）
#一个输出层（1个神经元，使用线性激活函数）。

#我们用均方误差（Mean Squared Error, MSE）作为损失函数。

In [None]:
import numpy as np

In [None]:
#求relu激活函数
def relu(a):
  return np.maximum(0, a)

In [30]:
#前向传播，求输出y_pred

def forward(W1, W2, b1, b2, X):
  Z = np.dot(X, W1) + b1 #shape (1,3)
  H = relu(Z) #shape (3,)

  y_pred = np.mean(W2 * H)
  return Z, H, y_pred

In [None]:
#求损失函数, loss值
def compute_loss(y_pred, y_true):
  loss = (y_pred - y_true)**2 / 2
  return loss

In [34]:
#反向传播，求梯度函数
def backward(y_pred, y_true, H, Z, W2, X):
  b2_grad = y_pred - y_true
  W2_grad = b2_grad * H

  b1_grad = b2_grad * W2 * (Z>0)
  W1_grad = np.outer(X, b1_grad)

  return W1_grad, W2_grad, b1_grad, b2_grad

In [None]:
#更新参数，weight,bias
def update_params(W1, W2, b1, b2, W1_grad, W2_grad, b1_grad, b2_grad, lr):
  W1 -= lr * W1_grad
  W2 -= lr * W2_grad
  b1 -= lr * b1_grad
  b2 -= lr * b2_grad
  return W1, W2, b1, b2

In [33]:
X = np.array([1.0, 2.0]) #ndarray(2,)（一个样本，2个特征）

y_true = 3.0

W1 = np.array([[0.5, 0.1, -0.3],
        [0.2, 0.4, 0.6]]) #ndarray(2,3)

b1 = np.array([0.1, 0.2, 0.3]) #ndarray(3,)

W2 = np.array([0.3, 0.7, -0.5]) #ndarray(3,) （形状为 3×1）

lr = 0.01

b2 = 0.1

epochs = 50

for epoch in range(epochs):
  Z, H, y_pred = forward(W1, W2, b1, b2, X)
  loss = compute_loss(y_pred, y_true)
  W1_grad, W2_grad, b1_grad, b2_grad = backward(y_pred, y_true, H, Z, W2, X)
  W1, W2, b1, b2 = update_params(W1, W2, b1, b2, W1_grad, W2_grad, b1_grad, b2_grad, lr)

  print("Epoch:", epoch + 1,
    "loss:", np.round(loss, 4),
    "W1_grad", np.round(W1_grad.flatten(), 2),
    "W2_grad", np.round(W2_grad.flatten(), 2),
    "b1_grad", np.round(b1_grad.flatten(), 2),
    "b2_grad", np.round(b2_grad.flatten(), 2)
    )

Epoch: 1 loss: 4.0423 W1_grad [-0.85 -1.99  1.42 -1.71 -3.98  2.84] W2_grad [-2.84 -3.13 -3.41] b1_grad [-0.85 -1.99  1.42] b2_grad [-2.84]
Epoch: 2 loss: 3.811 W1_grad [-0.91 -2.02  1.29 -1.81 -4.04  2.57] W2_grad [-2.9  -3.37 -3.08] b1_grad [-0.91 -2.02  1.29] b2_grad [-2.76]
Epoch: 3 loss: 3.583 W1_grad [-0.96 -2.05  1.16 -1.91 -4.1   2.33] W2_grad [-2.96 -3.59 -2.78] b1_grad [-0.96 -2.05  1.16] b2_grad [-2.68]
Epoch: 4 loss: 3.3558 W1_grad [-1.   -2.07  1.06 -2.01 -4.15  2.11] W2_grad [-3.01 -3.79 -2.51] b1_grad [-1.   -2.07  1.06] b2_grad [-2.59]
Epoch: 5 loss: 3.128 W1_grad [-1.04 -2.1   0.96 -2.09 -4.2   1.91] W2_grad [-3.06 -3.97 -2.26] b1_grad [-1.04 -2.1   0.96] b2_grad [-2.5]
Epoch: 6 loss: 2.8988 W1_grad [-1.08 -2.12  0.87 -2.16 -4.23  1.73] W2_grad [-3.1  -4.13 -2.04] b1_grad [-1.08 -2.12  0.87] b2_grad [-2.41]
Epoch: 7 loss: 2.6683 W1_grad [-1.11 -2.12  0.78 -2.21 -4.25  1.57] W2_grad [-3.12 -4.25 -1.84] b1_grad [-1.11 -2.12  0.78] b2_grad [-2.31]
Epoch: 8 loss: 2.4374 W1