In [1]:
#考虑一个单隐藏层的神经网络，结构如下：

#输入层：2 个神经元。
#隐藏层：2 个神经元，使用 Sigmoid 激活函数。
#输出层：1 个神经元，使用线性激活函数（即无激活函数）。

In [2]:
import numpy as np

In [7]:
#sigmoid激活函数
def sigmoid(a):
  return 1 / 1 + np.exp(-a)

In [25]:
#前向传播, 输出y_pred
def forward(W1, W2, b1, b2, X):
  Z = np.dot(X, W1) + b1 #ndarray (2,)
  H = sigmoid(Z) #ndarray (2,)

  y_pred = np.dot(H, W2.T) + b2

  return Z, H, y_pred

In [14]:
#求损失函数loss值

def compute_loss(y_pred, y_true):
  loss = (y_pred - y_true)**2 / 2
  return loss

In [22]:
#反向传播函数,求梯度变化
def backward(y_pred, y_true, W2, H, X):
  b2_grad = y_pred - y_true
  W2_grad = b2_grad * H

  bh = b2_grad * W2 # ndarray (2,)
  bz = bh * H * (1-H) # ndarray (2,)
  b1_grad = bz
  W1_grad = np.outer(bz, X)

  return W1_grad, W2_grad, b1_grad, b2_grad

In [23]:
#更新参数weight, bias
def update_params(W1, W2, b1, b2, W1_grad, W2_grad, b1_grad, b2_grad, learning_rate):
  W1 -= learning_rate * W1_grad
  W2 -= learning_rate * W2_grad
  b1 -= learning_rate * b1_grad
  b2 -= learning_rate * b2_grad
  return W1, W2, b1, b2

In [27]:
#初始化
X = np.array([0.5, 1.0]) # ndarray (2,)

W1 = np.array([[0.1, 0.3],
        [0.2, 0.4]]) # ndarray (2,2)

b1 = np.array([0.1, -0.1]) # ndarray (2,)
W2 = np.array([0.5, 0.7]) # ndarray (2,)

b2 = 0.2

y_true = 2.0
learning_rate = 0.01

epochs = 40

for epoch in range(epochs):
  Z, H, y_pred = forward(W1, W2, b1, b2, X)
  loss = compute_loss(y_pred, y_true)
  W1_grad, W2_grad, b1_grad, b2_grad = backward(y_pred, y_true, W2, H, X)
  W1, W2, b1, b2 = update_params(W1, W2, b1, b2, W1_grad, W2_grad, b1_grad, b2_grad, learning_rate)

  print("Epoch:", epoch + 1,
    "loss:", np.round(np.mean(loss), 4),
    "W1_grad:", np.round(W1_grad.flatten(), 2),
    "W2_grad:", np.round(W2_grad.flatten(), 2),
    "b1_grad:", np.round(b1_grad.flatten(), 2),
    "b2_grad:", np.round(b2_grad.flatten(), 2),
    "w1:", np.round(W1, 2),
    "w2:", np.round(W2, 2),
    "b1:", np.round(b1, 2),
    "b2:", np.round(b2, 2),
    )

Epoch: 1 loss: 0.0197 W1_grad: [-0.06 -0.12 -0.07 -0.15] W2_grad: [0.34 0.33] b1_grad: [-0.12 -0.15] b2_grad: [0.2] w1: [[0.1 0.3]
 [0.2 0.4]] w2: [0.5 0.7] b1: [ 0.1 -0.1] b2: 0.2
Epoch: 2 loss: 0.0168 W1_grad: [-0.05 -0.11 -0.07 -0.13] W2_grad: [0.31 0.3 ] b1_grad: [-0.11 -0.13] b2_grad: [0.18] w1: [[0.1 0.3]
 [0.2 0.4]] w2: [0.49 0.69] b1: [ 0.1 -0.1] b2: 0.2
Epoch: 3 loss: 0.0143 W1_grad: [-0.05 -0.1  -0.06 -0.12] W2_grad: [0.29 0.28] b1_grad: [-0.1  -0.12] b2_grad: [0.17] w1: [[0.1 0.3]
 [0.2 0.4]] w2: [0.49 0.69] b1: [ 0.1 -0.1] b2: 0.19
Epoch: 4 loss: 0.0122 W1_grad: [-0.05 -0.09 -0.06 -0.11] W2_grad: [0.27 0.25] b1_grad: [-0.09 -0.11] b2_grad: [0.16] w1: [[0.1  0.3 ]
 [0.2  0.41]] w2: [0.49 0.69] b1: [ 0.1  -0.09] b2: 0.19
Epoch: 5 loss: 0.0104 W1_grad: [-0.04 -0.08 -0.05 -0.1 ] W2_grad: [0.24 0.23] b1_grad: [-0.08 -0.1 ] b2_grad: [0.14] w1: [[0.1  0.31]
 [0.2  0.41]] w2: [0.49 0.69] b1: [ 0.11 -0.09] b2: 0.19
Epoch: 6 loss: 0.0089 W1_grad: [-0.04 -0.08 -0.05 -0.09] W2_grad: [0