In [29]:
#考虑一个单隐藏层的神经网络，结构如下：

#输入层：2 个神经元。
#隐藏层：2 个神经元，使用 Sigmoid 激活函数。
#输出层：1 个神经元，使用线性激活函数（即无激活函数）。

In [30]:
import numpy as np

In [31]:
#sigmoid激活函数
def sigmoid(a):
  return 1 / (1 + np.exp(-a))

In [32]:
#前向传播, 输出y_pred
def forward(W1, W2, b1, b2, X):
  Z = np.dot(X, W1) + b1 #ndarray (2,)
  H = sigmoid(Z) #ndarray (2,)

  y_pred = np.dot(H, W2.T) + b2

  return Z, H, y_pred

In [33]:
#求损失函数loss值

def compute_loss(y_pred, y_true):
  loss = (y_pred - y_true)**2 / 2
  return loss

In [34]:
#反向传播函数,求梯度变化
def backward(y_pred, y_true, W2, H, X):
  b2_grad = y_pred - y_true
  W2_grad = b2_grad * H

  bh = b2_grad * W2 # ndarray (2,)
  bz = bh * H * (1-H) # ndarray (2,)
  b1_grad = bz
  W1_grad = np.outer(bz, X)

  return W1_grad, W2_grad, b1_grad, b2_grad

In [35]:
#更新参数weight, bias
def update_params(W1, W2, b1, b2, W1_grad, W2_grad, b1_grad, b2_grad, learning_rate):
  W1 -= learning_rate * W1_grad
  W2 -= learning_rate * W2_grad
  b1 -= learning_rate * b1_grad
  b2 -= learning_rate * b2_grad
  return W1, W2, b1, b2

In [40]:
#初始化
X = np.array([0.5, 1.0]) # ndarray (2,)

W1 = np.array([[0.1, 0.3],
        [0.2, 0.4]]) # ndarray (2,2)

b1 = np.array([0.1, -0.1]) # ndarray (2,)
W2 = np.array([0.5, 0.7]) # ndarray (2,)

b2 = 0.2

y_true = 2.0
learning_rate = 0.01

epochs = 100

for epoch in range(epochs):
  Z, H, y_pred = forward(W1, W2, b1, b2, X)
  loss = compute_loss(y_pred, y_true)
  W1_grad, W2_grad, b1_grad, b2_grad = backward(y_pred, y_true, W2, H, X)
  W1, W2, b1, b2 = update_params(W1, W2, b1, b2, W1_grad, W2_grad, b1_grad, b2_grad, learning_rate)

  if epoch%10 ==0:
    print("Epoch:", epoch + 1,
      "loss:", np.round(np.mean(loss), 4),
      "W1_grad:", np.round(W1_grad.flatten(), 2),
      "W2_grad:", np.round(W2_grad.flatten(), 2),
      "b1_grad:", np.round(b1_grad.flatten(), 2),
      "b2_grad:", np.round(b2_grad.flatten(), 2),
      "w1:", np.round(W1, 2),
      "w2:", np.round(W2, 2),
      "b1:", np.round(b1, 2),
      "b2:", np.round(b2, 2),
      )

Epoch: 1 loss: 0.5824 W1_grad: [-0.07 -0.13 -0.09 -0.18] W2_grad: [-0.63 -0.66] b1_grad: [-0.13 -0.18] b2_grad: [-1.08] w1: [[0.1 0.3]
 [0.2 0.4]] w2: [0.51 0.71] b1: [ 0.1 -0.1] b2: 0.21
Epoch: 11 loss: 0.4027 W1_grad: [-0.06 -0.12 -0.08 -0.16] W2_grad: [-0.53 -0.56] b1_grad: [-0.12 -0.16] b2_grad: [-0.9] w1: [[0.11 0.31]
 [0.21 0.42]] w2: [0.56 0.77] b1: [ 0.11 -0.08] b2: 0.31
Epoch: 21 loss: 0.2765 W1_grad: [-0.05 -0.11 -0.07 -0.14] W2_grad: [-0.44 -0.47] b1_grad: [-0.11 -0.14] b2_grad: [-0.74] w1: [[0.11 0.33]
 [0.22 0.43]] w2: [0.61 0.82] b1: [ 0.13 -0.07] b2: 0.39
Epoch: 31 loss: 0.1886 W1_grad: [-0.05 -0.1  -0.06 -0.12] W2_grad: [-0.37 -0.39] b1_grad: [-0.1  -0.12] b2_grad: [-0.61] w1: [[0.12 0.34]
 [0.22 0.45]] w2: [0.65 0.86] b1: [ 0.14 -0.05] b2: 0.46
Epoch: 41 loss: 0.128 W1_grad: [-0.04 -0.08 -0.05 -0.1 ] W2_grad: [-0.31 -0.33] b1_grad: [-0.08 -0.1 ] b2_grad: [-0.51] w1: [[0.12 0.34]
 [0.23 0.46]] w2: [0.69 0.9 ] b1: [ 0.14 -0.04] b2: 0.51
Epoch: 51 loss: 0.0865 W1_grad: [-