## 使用 Python 和 Numpy 实现神经网络模型

### 读取并预处理数据


对应文章地址：https://ml.tdh6.top/archives/150

In [1]:
# 导入库
import numpy as np


In [233]:
raw_datas = np.fromfile('housing.txt', sep=' ')
raw_datas

array([6.320e-03, 1.800e+01, 2.310e+00, ..., 3.969e+02, 7.880e+00,
       1.190e+01])

In [237]:
# 特征处理
features = [ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE','DIS', 
                 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV' ]
raw_datas.resize([len(raw_datas) // len(features),len(features)])

In [238]:
raw_datas.shape

(506, 14)

In [240]:
raw_datas[0]

array([6.320e-03, 1.800e+01, 2.310e+00, 0.000e+00, 5.380e-01, 6.575e+00,
       6.520e+01, 4.090e+00, 1.000e+00, 2.960e+02, 1.530e+01, 3.969e+02,
       4.980e+00, 2.400e+01])

In [241]:
# 标准化
def norm(x):
    return (x - np.mean(x)) / (np.max(x) - np.min(x))
norm_raw_datas = norm(raw_datas)

In [284]:
# 抽取训练集和测试集

train_data = norm_raw_datas[:400]
test_data = norm_raw_datas[400:]

# 抽取预测标签
train_data = train_data[:,:-1]
test_data = test_data[:,:-1]
train_label = train_data[:,-1]
test_label = test_data[:,-1]


In [291]:
def loss(z, y):
    error = z - y
    num_samples = error.shape[0]
    cost = error * error
    cost = np.sum(cost) / num_samples
    return cost

In [297]:
def loss(pred,real):
    e = pred - real
    n = e.shape[0]
    c = e * e
    return np.sum(c)/n

In [292]:
# 初始化模型参数
x_in = train_data
x_mid = np.random.randn(13)
x_out = np.random.randn(13)
w_l1 = np.random.randn(13*13).reshape(13,13)
w_l2 = np.random.randn(13)
b_l1 = np.random.randn(13)
b_l2 = np.random.randn(1)

In [250]:
x_mid = np.dot(x_in,w_l1) + b_l1

In [251]:
x_out = np.dot(x_mid,w_l2) + b_l2

In [252]:
loss(x_out,train_label)

15.469914421039276

In [226]:
# 梯度计算

In [274]:
grad_l2 = np.mean((x_out - train_label).reshape(400,1) * x_mid,axis = 0)
w_l2 = w_l2 - (0.01 * w_l2)

In [275]:
grad_l1 = np.mean((x_out - train_label).reshape(400,1) * x_in,axis = 0)
w_l1 = w_l1 - (0.01 * w_l1)

In [276]:
grad_b2 = np.mean((x_out - train_label).reshape(400,1),axis = 0)
grad_b1 = np.mean((x_out - train_label).reshape(400,1),axis = 0)
b_l2 = b_l2 - (grad_b2 * eta)
b_l1 = b_l1 - (grad_b1 * eta)

In [298]:
x_mid = np.dot(x_in,w_l1) + b_l1
x_out = np.dot(x_mid,w_l2) + b_l2
loss(x_out,train_label)

10.38190503667864

In [230]:
# 损失计算
loss(x_out,train_label)

49.78964997141533

In [294]:
test_in = test_data
x_mid = np.dot(test_in,w_l1) + b_l1
x_out = np.dot(x_mid,w_l2) + b_l2
loss(x_out,test_label)

12.39999885900921

In [315]:
def loss(pred,real):
    e = pred - real
    n = e.shape[0]
    c = e * e
    return np.sum(c)/n

x_in = train_data
x_mid = np.random.randn(13)
x_out = np.random.randn(13)
w_l1 = np.random.randn(13*13).reshape(13,13)
w_l2 = np.random.randn(13)
b_l1 = np.random.randn(13)
b_l2 = np.random.randn(1)

def test_loss():
    test_in = test_data
    x_mid = np.dot(test_in,w_l1) + b_l1
    x_out = np.dot(x_mid,w_l2) + b_l2
    return loss(x_out,test_label)

max_loop = 500 # 最大循环次数
early_stop_wait = 5 # 当测试集损失不再下降几轮之后停止训练
not_min_count = 0 # 计数用变量
min_temp = 0 # 临时变量

for i in range(max_loop):
    # 计算前向计算结果
    x_mid = np.dot(x_in,w_l1) + b_l1
    x_out = np.dot(x_mid,w_l2) + b_l2
    
    # 计算梯度
    grad_l2 = np.mean((x_out - train_label).reshape(400,1) * x_mid,axis = 0)
    grad_l1 = np.mean((x_out - train_label).reshape(400,1) * x_in,axis = 0)
    grad_b2 = np.mean((x_out - train_label).reshape(400,1),axis = 0)
    grad_b1 = np.mean((x_out - train_label).reshape(400,1),axis = 0)
    
    # 权重调整
    w_l2 = w_l2 - (0.01 * w_l2)
    w_l1 = w_l1 - (0.01 * w_l1)
    b_l2 = b_l2 - (grad_b2 * eta)
    b_l1 = b_l1 - (grad_b1 * eta)
    
    # 验证模型
    nowloss = loss(x_out,train_label)
    
    # 验证模型并提前停止
    if(i == 0):
        min_temp = loss(x_out,train_label)
    elif (min_temp > nowloss):
        min_temp = nowloss
        not_min_count = 0
    else:
        not_min_count += 1
        if not_min_count > early_stop_wait:
            break;
    
    print("第" +  str(i)  + "轮训练，val_loss：" + str(nowloss)[:8] + "\tloss:" + str(loss(x_out,train_label))[:8] )



第0轮训练，val_loss：7.980471	loss:7.980471
第1轮训练，val_loss：7.033357	loss:7.033357
第2轮训练，val_loss：6.203279	loss:6.203279
第3轮训练，val_loss：5.475114	loss:5.475114
第4轮训练，val_loss：4.835781	loss:4.835781
第5轮训练，val_loss：4.273951	loss:4.273951
第6轮训练，val_loss：3.779804	loss:3.779804
第7轮训练，val_loss：3.344822	loss:3.344822
第8轮训练，val_loss：2.961604	loss:2.961604
第9轮训练，val_loss：2.623720	loss:2.623720
第10轮训练，val_loss：2.325572	loss:2.325572
第11轮训练，val_loss：2.062285	loss:2.062285
第12轮训练，val_loss：1.829610	loss:1.829610
第13轮训练，val_loss：1.623838	loss:1.623838
第14轮训练，val_loss：1.441733	loss:1.441733
第15轮训练，val_loss：1.280461	loss:1.280461
第16轮训练，val_loss：1.137547	loss:1.137547
第17轮训练，val_loss：1.010822	loss:1.010822
第18轮训练，val_loss：0.898384	loss:0.898384
第19轮训练，val_loss：0.798566	loss:0.798566
第20轮训练，val_loss：0.709904	loss:0.709904
第21轮训练，val_loss：0.631110	loss:0.631110
第22轮训练，val_loss：0.561054	loss:0.561054
第23轮训练，val_loss：0.498739	loss:0.498739
第24轮训练，val_loss：0.443289	loss:0.443289
第25轮训练，val_loss：0.393929	loss:0.393