# 神经网络

## 1. 反向传播神经网络模型

In [1]:
import numpy as np

### 1.1 读取数据

In [2]:
data_tr = np.loadtxt('../data/BPdata_tr.txt', skiprows=1, delimiter=',')
data_te = np.loadtxt('../data/BPdata_te.txt', skiprows=1, delimiter=',')

### 1.2 分离特征和标签

In [3]:
x_data = data_tr[:,:2]
y_data = data_tr[:,-1:]

In [4]:
x_data.shape

(500, 2)

In [5]:
y_data.shape

(500, 1)

### 1.3 确定网络结构

In [6]:
input_num = x_data.shape[1] # 输入层神经元的数量
hidden_num = 4
output_num = y_data.shape[1] # 输出层神经元的数量

### 1.4 参数的初始化

In [7]:
V = np.random.random(size=(input_num, hidden_num))
P = np.random.random(size=(hidden_num))
W = np.random.random(size=(hidden_num, output_num))
Q = np.random.random(size=(output_num))

### 1.5 前向计算（forward）

In [8]:
x = x_data[0]
y = y_data[0]

In [9]:
x.shape

(2,)

#### 定义sigmoid函数

In [10]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [11]:
alpha = x @ V - P
b = sigmoid(alpha)
beta = b @ W - Q
y_hat = sigmoid(beta)
y_hat

array([0.5686003])

### 1.6 反向传播（backward）

#### 定义4个容器用来存储相关参数的梯度

In [12]:
d_W = np.zeros_like(W)
d_Q = np.zeros_like(Q)
d_V = np.zeros_like(V)
d_P = np.zeros_like(P)

In [13]:
for j in range(output_num):
    for h in range(hidden_num):
        d_W[h,j] = (y_hat[j] - y[j]) * (y_hat[j]) * (1 - y_hat[j]) * b[h]
    d_Q[j] = (y_hat[j] - y[j]) * (y_hat[j]) * (1 - y_hat[j]) * (-1)

In [14]:
for j in range(output_num):
    for h in range(hidden_num):
        for i in range(input_num):
            d_V[i,h] = (y_hat[j] - y[j]) * (y_hat[j]) * (1 - y_hat[j]) * W[h,j] * b[h] * (1 - b[h]) * x[i]
        d_P[h] = (y_hat[j] - y[j]) * (y_hat[j]) * (1 - y_hat[j]) * W[h,j] * b[h] * (1 - b[h]) * (-1)

#### 定义学习率

In [15]:
yita = 0.5

#### 更新参数

In [16]:
W -= d_W * yita
Q -= d_Q * yita
V -= d_V * yita
P -= d_P * yita

### 1.7 批量梯度下降

In [17]:
def loss(y_true, y_pred):
    return 1/2 * np.sum((y_true - y_pred) ** 2)

In [18]:
V = np.random.random(size=(input_num, hidden_num))
P = np.random.random(size=(hidden_num))
W = np.random.random(size=(hidden_num, output_num))
Q = np.random.random(size=(output_num))

In [19]:
d_W = np.zeros_like(W)
d_Q = np.zeros_like(Q)
d_V = np.zeros_like(V)
d_P = np.zeros_like(P)

In [20]:
yita, epochs, batch_size = 0.1, 100, 8

In [21]:
index = np.arange(len(x_data))
np.random.shuffle(index)

for epoch in range(epochs):
    e = []
    if len(x_data) % batch_size == 0:
        batch_size_num = len(x_data) // batch_size
    else:
        batch_size_num = len(x_data) // batch_size + 1
    for bs in range(batch_size_num):
        x, y = x_data[bs * batch_size:(bs+1)*batch_size], y_data[bs * batch_size:(bs+1)*batch_size]
        alpha = x @ V - P
        b = sigmoid(alpha)
        beta = b @ W - Q
        y_hat = sigmoid(beta)
        error = loss(y, y_hat)
        e.append(error)
        # 反向计算
        for j in range(output_num):
            for h in range(hidden_num):
                d_W[h,j] = np.sum((y_hat[:,j] - y[:,j]) * (y_hat[:,j]) * (1 - y_hat[:,j]) * b[:,h])
            d_Q[j] = np.sum((y_hat[:,j] - y[:,j]) * (y_hat[:,j]) * (1 - y_hat[:,j]) * (-1))
        for j in range(output_num):
            for h in range(hidden_num):
                for i in range(input_num):
                    d_V[i,h] = np.sum((y_hat[:,j] - y[:,j]) * (y_hat[:,j]) * (1 - y_hat[:,j]) * W[h,j] * b[:,h] * (1 - b[:,h]) * x[:,i])
                d_P[h] = np.sum((y_hat[:,j] - y[:,j]) * (y_hat[:,j]) * (1 - y_hat[:,j]) * W[h,j] * b[:,h] * (1 - b[:,h]) * (-1))
        W -= d_W * yita
        Q -= d_Q * yita
        V -= d_V * yita
        P -= d_P * yita
    print(f"epoch:{epoch+1}, loss:{np.mean(e)}")

epoch:1, loss:0.20897768913762402
epoch:2, loss:0.1665053653218643
epoch:3, loss:0.163378607548636
epoch:4, loss:0.1601907801837857
epoch:5, loss:0.15687236600969273
epoch:6, loss:0.15336727664813282
epoch:7, loss:0.14962608566777286
epoch:8, loss:0.145606254890961
epoch:9, loss:0.14127293924022838
epoch:10, loss:0.13660030234036963
epoch:11, loss:0.13157325436736944
epoch:12, loss:0.12618944030729812
epoch:13, loss:0.1204611998531618
epoch:14, loss:0.11441711307754011
epoch:15, loss:0.10810267841399236
epoch:16, loss:0.10157968899048328
epoch:17, loss:0.0949240160152126
epoch:18, loss:0.08822177313686135
epoch:19, loss:0.08156417040385694
epoch:20, loss:0.07504167396939035
epoch:21, loss:0.06873826462328705
epoch:22, loss:0.06272657200546793
epoch:23, loss:0.057064460266161715
epoch:24, loss:0.051793328153928994
epoch:25, loss:0.04693806338686532
epoch:26, loss:0.042508343473345866
epoch:27, loss:0.038500845615316075
epoch:28, loss:0.034901914055629875
epoch:29, loss:0.031690301352486

### 1.8 模型测试

In [22]:
x_test = data_te[:,:2]
y_test = data_te[:,-1:]

In [23]:
alpha = x_test @ V - P
b = sigmoid(alpha)
beta = b @ W - Q
y_hat = sigmoid(beta)

In [24]:
y_hat[:10], y_test[:10]

(array([[0.50499818],
        [0.27073433],
        [0.1129674 ],
        [0.79762755],
        [0.6541421 ],
        [0.27288332],
        [0.24638933],
        [0.65178007],
        [0.23346635],
        [0.06465886]]),
 array([[0.46775308],
        [0.27956488],
        [0.1220108 ],
        [0.9160261 ],
        [0.62895224],
        [0.46066648],
        [0.25524112],
        [0.63238421],
        [0.19881207],
        [0.01403755]]))