In [1]:
#假设有一个简单的神经网络，
#包含一个输入层（2个神经元）
#一个隐藏层（3个神经元，使用 ReLU 激活函数）
#一个输出层（1个神经元，使用线性激活函数）。

#我们用均方误差（Mean Squared Error, MSE）作为损失函数。

In [2]:
import numpy as np

In [3]:
#求relu激活函数
def relu(a):
  return np.maximum(0, a)

In [11]:
#前向传播，求输出y_pred

def forward(W1, W2, b1, b2, X):
  Z = np.outer(X, W2) + b1 #shape (2,3)
  H = np.mean(relu(Z), axis=0, keepdims=True).flatten() #shape (3,)

  y_temp = np.mean((np.outer(W2, H) + b2), axis=0, keepdims=True).flatten()

  y_pred = np.mean(y_temp)
  return Z, H, y_pred

In [14]:
#求损失函数, loss值
def compute_loss(y_pred, y_true):
  loss = (y_pred - y_true)**2 / 2
  return loss

In [18]:
#反向传播，求梯度函数
def backward(y_pred, y_true, H, Z, W2, X):
  b2_grad = y_pred - y_true
  W2_grad = b2_grad * H

  Z = np.mean(Z, axis=0, keepdims=True).flatten()
  b1_grad = b2_grad * W2 * (Z>0)
  W1_grad = np.outer(X, b1_grad)

  return W1_grad, W2_grad, b1_grad, b2_grad

In [20]:
#更新参数，weight,bias
def update_params(W1, W2, b1, b2, W1_grad, W2_grad, b1_grad, b2_grad, lr):
  W1 -= lr * W1_grad
  W2 -= lr * W2_grad
  b1 -= lr * b1_grad
  b2 -= lr * b2_grad
  return W1, W2, b1, b2

In [23]:
X = np.array([1.0, 2.0]) #ndarray(2,)（一个样本，2个特征）

y_true = 3.0

W1 = np.array([[0.5, 0.1, -0.3],
        [0.2, 0.4, 0.6]]) #ndarray(2,3)

b1 = np.array([0.1, 0.2, 0.3]) #ndarray(3,)

W2 = np.array([0.3, 0.7, -0.5]) #ndarray(3,) （形状为 3×1）

lr = 0.01

b2 = 0.1

epochs = 50

for epoch in range(epochs):
  Z, H, y_pred = forward(W1, W2, b1, b2, X)
  loss = compute_loss(y_pred, y_true)
  W1_grad, W2_grad, b1_grad, b2_grad = backward(y_pred, y_true, H, Z, W2, X)
  W1, W2, b1, b2 = update_params(W1, W2, b1, b2, W1_grad, W2_grad, b1_grad, b2_grad, lr)

  print("Epoch:", epoch + 1,
    "loss:", np.round(loss, 4),
    "W1_grad", np.round(W1_grad.flatten(), 2),
    "W2_grad", np.round(W2_grad.flatten(), 2),
    "b1_grad", np.round(b1_grad.flatten(), 2),
    "b2_grad", np.round(b2_grad.flatten(), 2)
    )

Epoch: 1 loss: 3.92 W1_grad [-0.84 -1.96  0.   -1.68 -3.92  0.  ] W2_grad [-1.54 -3.5  -0.  ] b1_grad [-0.84 -1.96  0.  ] b2_grad [-2.8]
Epoch: 2 loss: 3.7966 W1_grad [-0.87 -2.03  0.   -1.74 -4.05  0.  ] W2_grad [-1.6  -3.64 -0.  ] b1_grad [-0.87 -2.03  0.  ] b2_grad [-2.76]
Epoch: 3 loss: 3.6713 W1_grad [-0.9  -2.09  0.   -1.8  -4.18  0.  ] W2_grad [-1.66 -3.79 -0.  ] b1_grad [-0.9  -2.09  0.  ] b2_grad [-2.71]
Epoch: 4 loss: 3.5439 W1_grad [-0.93 -2.15  0.   -1.85 -4.31  0.  ] W2_grad [-1.73 -3.93 -0.  ] b1_grad [-0.93 -2.15  0.  ] b2_grad [-2.66]
Epoch: 5 loss: 3.4143 W1_grad [-0.95 -2.22  0.   -1.91 -4.43  0.  ] W2_grad [-1.79 -4.06 -0.  ] b1_grad [-0.95 -2.22  0.  ] b2_grad [-2.61]
Epoch: 6 loss: 3.2825 W1_grad [-0.98 -2.28  0.   -1.96 -4.56  0.  ] W2_grad [-1.84 -4.2  -0.  ] b1_grad [-0.98 -2.28  0.  ] b2_grad [-2.56]
Epoch: 7 loss: 3.1485 W1_grad [-1.01 -2.34  0.   -2.02 -4.67  0.  ] W2_grad [-1.9  -4.33 -0.  ] b1_grad [-1.01 -2.34  0.  ] b2_grad [-2.51]
Epoch: 8 loss: 3.0123 W