In [6]:
#考虑一个具有两个隐藏层的神经网络，结构如下：

#一个输入节点 (x)
#第一个隐藏层，包含 2 个 神经元 (h_{1,1}, h_{1,2})，均使用 ReLU 激活函数。
#第二个隐藏层，包含 2 个 神经元 (h_{2,1}, h_{2,2})，均使用 ReLU 激活函数。
#一个输出节点 (y_{pred})，使用 Sigmoid 激活函数。

In [3]:
import numpy as np
import matplotlib.pyplot as plt

In [7]:
#计算relu函数
def relu(a):
  return np.maximum(0, a)

In [8]:
#计算sigmoid函数
def sigmoid(a):
  return 1 / (1 + np.exp(-a))

In [106]:
#正向传播，求出y_pred
def forward(w1, w2, w3, b1, b2, b3, x):
  z1 = np.outer(x, w1.T) + b1.T #shape(1,2)
  h1 = relu(z1) # shape(1,2)

  z2 = np.dot(h1, w2.T) + b2.T # shape (1, 2)
  h2 = relu(z2) # shape(1,2)

  zout = np.dot(h2, w3.T) + b3 #shape (2,1)
  y_pred = sigmoid(zout).flatten()
  return z1, h1, z2, h2, zout, y_pred

In [13]:
#计算损失函数loss
def compute_loss(y_pred, y_true):
  loss = (y_pred - y_true)**2 /2
  return loss

In [108]:
#反向传播，计算梯度
def backward(z1, h1, z2, h2, zout, y_pred, y_true, x):
  dypred = y_pred - y_true
  b3_grad = dypred * (1- y_pred**2) # shape(1,)
  w3_grad = np.dot(b3_grad, h2.T) # shape(1,2)

  dh2 = np.dot(w3.T, b3_grad) #shape (2,1)
  b2_grad = dh2 * (z2>0) #shape (2,1)
  w2_grad = np.dot(b2_grad, h1.T) #shape (2,2)

  dh1 = np.dot(w2.T, b2_grad) #shape (2,1)
  b1_grad = dh1 * (z1>0)
  w1_grad = np.dot(b1_grad, x) # shape (2,1)

  return w1_grad, w2_grad, w3_grad, b1_grad, b2_grad, b3_grad

In [49]:
#更新参数
def update_params(w1, w2, w3, b1, b2, b3, w1_grad, w2_grad, w3_grad, b1_grad, b2_grad, b3_grad, learning_rate):
  w1 -= learning_rate * w1_grad
  w2 -= learning_rate * w2_grad
  w3 -= learning_rate * w3_grad
  b1 -= learning_rate * b1_grad
  b2 -= learning_rate * b2_grad
  b3 -= learning_rate * b3_grad
  return w1, w2, w3, b1, b2, b3

In [109]:
#参数初始化
w1 = np.array([[0.2],
        [-0.3]]) #shape (2,1)

b1 = np.array([[0.1],
        [-0.1]]) #shape (2,1)

w2 = np.array([[0.4, -0.1],
        [0.2, 0.5]]) #shape (2,2)

b2 = np.array([[-0.2],
        [0.3]]) # shape (2,1)

w3 = np.array([[-0.6, 0.7]]) #shape (1,2)

b3 = np.array([[0.1]]) #shape (1,1)

learning_rate = 0.01

x = np.array([1.5])
y_true = np.array([0.6])

epochs = 1000
losses = []

for epoch in range(epochs):
  z1, h1, z2, h2, zout, y_pred = forward(w1, w2, w3, b1, b2, b3, x)
  loss = compute_loss(y_pred, y_true)
  losses.append(loss)

  w1_grad, w2_grad, w3_grad, b1_grad, b2_grad, b3_grad = backward(z1, h1, z2, h2, zout, y_pred, y_true, x)
  w1, w2, w3, b1, b2, b3 = update_params(w1, w2, w3, b1, b2, b3, w1_grad, w2_grad, w3_grad, b1_grad, b2_grad, b3_grad, learning_rate)

  if epoch%10 == 0:
    print("Epoch:", epoch + 1,
      f"loss: {loss:.4f}",
      f"y_pred: {np.round(y_pred, 4)}",
      f"w1_grad: {np.round(w1_grad.flatten(), 2)}",
      f"w2_grad: {np.round(w2_grad.flatten(), 2)}",
      f"w3_grad: {np.round(w3_grad.flatten(), 2)}",
      f"b1_grad: {np.round(b1_grad.flatten(), 2)}",
      f"b2_grad: {np.round(b2_grad.flatten(), 2)}",
      f"b3_grad: {np.round(b3_grad, 2)}"
      )

plt.plot(range(epochs), losses, "blue")
plt.xlabel("epoch axis")
plt.ylabel("loss axis")
plt.show()


ValueError: shapes (1,) and (2,1) not aligned: 1 (dim 0) != 2 (dim 0)