In [1]:
#题目描述：

#考虑一个用于二元分类任务的简单神经网络，其结构如下：

#输入层： 2 个神经元
#隐藏层： 2 个神经元，使用 Sigmoid 激活函数
#输出层： 1 个神经元，使用 Sigmoid 激活函数

In [2]:
import numpy as np

In [6]:
#sigmoid激活函数
def sigmoid(a):
  return 1/(1 + np.exp(-a))

In [7]:
#前向传播,求得输出y_pred
def forward(W1, W2, X, b1, b2):
  Z = np.dot(W1, X) + b1 #shape (2,2)
  H = sigmoid(Z) #shape (2,2)

  Zo = np.dot(H, W2.T) + b2 #shape (2,1)
  y_pred = sigmoid(Zo) #shape (2,1)

  return Z, H, Zo, y_pred

In [10]:
#求损失函数loss

def compute_loss(y_pred, y_true):
  loss = (y_pred - y_true)**2 / 2
  return loss

In [17]:
#计算反向传播,求梯度
def backward(y_pred, y_true, H, W2, X):
  y_diff = y_pred - y_true #shape (2,1)

  dzo = y_diff * y_pred * (1-y_pred) #shape (2,1)
  b2_grad = np.mean(dzo, axis=0, keepdims=True) #shape (1,1)
  W2_grad = np.dot(dzo.T, H)

  dh = np.dot(dzo, W2) #shape (2,2)
  dz = dh*(1-dh) #shape (2,2)
  b1_grad = np.mean(dz, axis=0, keepdims=True).T #shape(2,1)
  W1_grad = np.dot(dz.T, X) #shape (2,2)

  return W1_grad, W2_grad, b1_grad, b2_grad

In [20]:
#更新参数weight, bias

def update_params(W1, W2, b1, b2, W1_grad, W2_grad, b1_grad, b2_grad, learning_rate):
  W1 -= learning_rate * W1_grad
  W2 -= learning_rate * W2_grad
  b1 -= learning_rate * b1_grad
  b2 -= learning_rate * b2_grad

  return W1, W2, b1, b2

In [28]:
X = np.array([[0.1, 0.2], [0.3, 0.4]])
y_true = np.array([[0], [1]])

W1 = np.array([[0.1, 0.2], [0.3, 0.4]]) #(输入层到隐藏层)
b1 = np.array([[0.1], [0.2]]) #(隐藏层偏置)
W2 = np.array([[0.5, 0.6]]) #(隐藏层到输出层)
b2 = np.array([[0.7]]) #(输出层偏置)

learning_rate = 0.1

epochs = 100

for epoch in range(epochs):
  Z, H, Zo, y_pred = forward(W1, W2, X, b1, b2)
  loss = compute_loss(y_pred, y_true)
  W1_grad, W2_grad, b1_grad, b2_grad = backward(y_pred, y_true, H, W2, X)
  W1, W2, b1, b2 = update_params(W1, W2, b1, b2, W1_grad, W2_grad, b1_grad, b2_grad, learning_rate)

  print("Epoch:", epoch + 1,
    "loss:", np.round(np.mean(loss), 4),
    "W1_grad", np.round(W1_grad.flatten(), 2),
    "W2_grad", np.round(W2_grad.flatten(), 2),
    "b1_grad", np.round(b1_grad.flatten(), 2),
    "b2_grad", np.round(b2_grad.flatten(), 2),
    )

Epoch: 1 loss: 0.165 W1_grad [0.   0.01 0.   0.01] W2_grad [0.05 0.05] b1_grad [0.02 0.03] b2_grad [0.05]
Epoch: 2 loss: 0.1644 W1_grad [0.   0.01 0.   0.01] W2_grad [0.05 0.05] b1_grad [0.02 0.03] b2_grad [0.05]
Epoch: 3 loss: 0.1639 W1_grad [0.   0.01 0.   0.01] W2_grad [0.05 0.05] b1_grad [0.02 0.03] b2_grad [0.05]
Epoch: 4 loss: 0.1633 W1_grad [0.   0.01 0.   0.01] W2_grad [0.05 0.05] b1_grad [0.02 0.03] b2_grad [0.05]
Epoch: 5 loss: 0.1628 W1_grad [0.   0.01 0.   0.01] W2_grad [0.05 0.05] b1_grad [0.02 0.03] b2_grad [0.05]
Epoch: 6 loss: 0.1622 W1_grad [0.   0.   0.   0.01] W2_grad [0.05 0.05] b1_grad [0.02 0.03] b2_grad [0.05]
Epoch: 7 loss: 0.1617 W1_grad [0.   0.   0.   0.01] W2_grad [0.05 0.05] b1_grad [0.02 0.02] b2_grad [0.05]
Epoch: 8 loss: 0.1612 W1_grad [0.   0.   0.   0.01] W2_grad [0.05 0.05] b1_grad [0.02 0.02] b2_grad [0.05]
Epoch: 9 loss: 0.1606 W1_grad [0.   0.   0.   0.01] W2_grad [0.05 0.05] b1_grad [0.02 0.02] b2_grad [0.05]
Epoch: 10 loss: 0.1601 W1_grad [0.   0