In [1]:
#神经网络的计算方式
#第一层
#神经元1:z1 = w1*x1 + w2*x2 + ... + w12*x12 + b
#神经元2:z2 = 同上
#神经元3:z3 = 同上

#神经元1:a1 = a(z1)
#神经元2:a2 = 同上
#神经元3:a3 = 同上

#第二层
#神经元1:z1 = w1*a1 + w2*a2 + w3*a3 + b
#神经元2:z2 = 同上

#神经元1:a1 = a(z1)
#神经元2:a2 = 同上

#误差函数
#C = 1/2 * [(y1 - a1)^2 + (y2 - a2)^2] <- 对所有x求和

#直接对所有变量求导是很复杂的,比如要对第一层神经元1的w1求导
#dC/dw1(第一层神经元1) = dC/da * da/dz * dz/da * (进入第一层)da/dz * dz/dw + (第二层神经元2同样的式子再来一遍)
#还要对上面这个式子,以所有的x求和

#定义符号delta = dC/dz
#因为 z = w1*x1 + ... + w12*x12 + b
#所以 dz/dw = x
#所以 dz/db = 1

#因为 dC/dw = dC/dz * dz/dw
#所以 dC/dw = delta * x

#因为 dC/db = dC/dz * dz/db
#所以 dC/db = delta

#可以直接求出第二层的delta
#delta = dC/dz
#delta = dC/da * da/dz
#因为 C = 1/2 * [(y1 - a1)^2 + (y2 - a2)^2] <- 对所有x求和
#所以 dC/da = a - y (dC/da1 = a1 - y1 , dC/da2 = a2 - y2)
#所以 delta = (a - y) * da/dz
#sigmoid函数求导
#da/dz = a(z) * (1 - a(z))
#所以 delta = (a - y) * a(z) * (1 - a(z))

#求第一层的delta
#delta = dC/dz(第二层) * dz/da * da/dz + dC/dz(第二层第二个神经元) * dz/da * da/dz
#因为 dC/dz(第二层) = delta(第二层)
#因为 dz/da = w (这里的z是第二层的,a是第一层的,这个w也是第二层的)
#因为 da/dz = a(z) * (1 - a(z))
#所以 delta = delta(第二层) * w(第二层) * [a(z) * (1 - a(z))] + (第二层第二个神经元同样的式子)
#所以 delta = [delta(第二层) * w(第二层) + delta(第二层第二个) * w(第二层第二个)] * [a(z) * (1 - a(z))]

#到这里,可以总结出一般的delta的计算公式
#delta = [delta(上一层) * w(上一层) <- 对上一层所有神经元求和] * [a(z) * (1 - a(z))]

#求梯度
#dC/dw = delta * x <- 对所有x求和
#dC/db = delta <- 对所有x求和

In [2]:
import numpy as np


#加载数据
def load_data():
    with open('数字01.txt') as fr:
        lines = fr.readlines()

    x = np.empty((len(lines), 12), dtype=int)
    y = np.empty((len(lines), 2), dtype=int)

    for i in range(len(lines)):
        line = lines[i].strip().split(',')
        x[i] = line[:12]
        y[i] = [1, 0] if line[12] == '0' else [0, 1]

    return x, y


x, y = load_data()
x[:5], y[:5], x.shape, y.shape

(array([[1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1],
        [0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1],
        [1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1],
        [1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0],
        [1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1]]),
 array([[1, 0],
        [1, 0],
        [1, 0],
        [1, 0],
        [1, 0]]),
 (64, 12),
 (64, 2))

In [3]:
#定义常量
N, M = x.shape

lr = 0.2

In [4]:
#定义神经元对象
class Neural:
    def __init__(self, w, b):
        self.w = w
        self.b = b

    def run(self, xi):
        #线性计算
        self.z = np.multiply(xi, self.w).sum() + self.b

        #激活函数,sigmoid
        self.a = 1 / (1 + np.exp(-self.z))

In [5]:
#定义网络层对象
class Layer:
    def __init__(self, ns):
        #神经元列表
        self.ns = ns
        self.out = None

    #运行神经元,并记录运行结果
    def run(self, xi):
        for n in self.ns:
            n.run(xi)

        out = []
        for i in range(len(self.ns)):
            out.append(self.ns[i].a)
        self.out = np.array(out)

In [6]:
#定义第一层神经网络
ns = []

w = np.array([
    0.490, 0.348, 0.073, 0.837, -0.071, -3.617, -0.536, -0.023, -1.717, -1.456,
    -0.556, 0.852
])
ns.append(Neural(w, b=-0.185))

w = np.array([
    0.442, -0.537, 1.008, 1.072, -0.733, 0.823, -0.453, -0.014, -0.027, -0.427,
    1.876, -2.305
])

ns.append(Neural(w, b=0.526))

w = np.array([
    0.654, -1.389, 1.246, 0.057, -0.183, -0.743, -0.461, 0.331, 0.449, -1.296,
    1.569, -0.471
])
ns.append(Neural(w, b=-1.169))

layer_1 = Layer(ns)

#运行第一层神经网络
layer_1.run(x[0])

layer_1.out

array([0.00420612, 0.88058693, 0.17450929])

In [7]:
#定义第二层神经网络
ns = []

w = np.array([0.388, 0.803, 0.029])
ns.append(Neural(w, b=-1.438))

w = np.array([0.025, -0.790, 1.553])
ns.append(Neural(w, b=-1.379))

layer_2 = Layer(ns)

#运行第二层神经网络
layer_2.run(layer_1.out)

layer_2.out

array([0.32646968, 0.14142001])

In [8]:
#先计算所有x的输出矩阵
def get_out():
    out_1 = np.zeros((N, 3))
    out_2 = np.zeros((N, 2))
    for i in range(N):
        layer_1.run(x[i])
        layer_2.run(layer_1.out)

        out_1[i] = layer_1.out
        out_2[i] = layer_2.out
    return out_1, out_2


out_1, out_2 = get_out()
out_1[:5], out_2[:5]

(array([[0.00420612, 0.88058693, 0.17450929],
        [0.00258098, 0.82577863, 0.09903438],
        [0.00391117, 0.72908792, 0.05732418],
        [0.0017985 , 0.98665265, 0.25293871],
        [0.01779295, 0.91871392, 0.43585542]]),
 array([[0.32646968, 0.14142001],
        [0.31625713, 0.13267627],
        [0.29956661, 0.1340218 ],
        [0.34577006, 0.14609076],
        [0.33609856, 0.19349542]]))

In [9]:
#sigmoid函数求导 根据公式 da/dz = a(z) * (1 - a(z))
def da(a):
    return a * (1 - a)


da(3)

-6

In [10]:
#可以直接求出第二层的delta
#delta = dC/dz
#delta = dC/da * da/dz
#因为 C = 1/2 * [(y1 - a1)^2 + (y2 - a2)^2] <- 对所有x求和
#所以 dC/da = a - y (dC/da1 = a1 - y1 , dC/da2 = a2 - y2)
#所以 delta = (a - y) * da/dz
#sigmoid函数求导
#da/dz = a(z) * (1 - a(z))
#所以 delta = (a - y) * a(z) * (1 - a(z))
def get_delta_2():
    delta_2 = np.zeros((N, 2))
    for i in range(N):
        delta_2[i, 0] = (out_2[i, 0] - y[i, 0]) * da(out_2[i, 0])
        delta_2[i, 1] = (out_2[i, 1] - y[i, 1]) * da(out_2[i, 1])
    return delta_2


delta_2 = get_delta_2()
delta_2[:5]

array([[-0.14810072,  0.01717127],
       [-0.14785157,  0.01526749],
       [-0.14696946,  0.01555457],
       [-0.1479954 ,  0.01822457],
       [-0.14814052,  0.03019592]])

In [11]:
#求第一层的delta
#delta = dC/dz(第二层) * dz/da * da/dz + dC/dz(第二层第二个神经元) * dz/da * da/dz
#因为 dC/dz(第二层) = delta(第二层)
#因为 dz/da = w (这里的z是第二层的,a是第一层的,这个w也是第二层的)
#因为 da/dz = a(z) * (1 - a(z))
#所以 delta = delta(第二层) * w(第二层) * [a(z) * (1 - a(z))] + (第二层第二个神经元同样的式子)
#所以 delta = [delta(第二层) * w(第二层) + delta(第二层第二个) * w(第二层第二个)] * [a(z) * (1 - a(z))]
def get_delta_1():
    delta_1 = np.zeros((N, 3))
    for i in range(N):
        delta_1[i, 0] = (delta_2[i, 0] * layer_2.ns[0].w[0] +
                         delta_2[i, 1] * layer_2.ns[1].w[0]) * da(out_1[i, 0])

        delta_1[i, 1] = (delta_2[i, 0] * layer_2.ns[0].w[1] +
                         delta_2[i, 1] * layer_2.ns[1].w[1]) * da(out_1[i, 1])

        delta_1[i, 2] = (delta_2[i, 0] * layer_2.ns[0].w[2] +
                         delta_2[i, 1] * layer_2.ns[1].w[2]) * da(out_1[i, 2])

    return delta_1


delta_1 = get_delta_1()
delta_1[:5]

array([[-0.00023888, -0.01393182,  0.00322283],
       [-0.0001467 , -0.01881598,  0.00173302],
       [-0.00022064, -0.02573759,  0.00107504],
       [-0.00010227, -0.00175463,  0.00453711],
       [-0.00099132, -0.01066498,  0.01047427]])

In [12]:
#求梯度
#dC/dw = delta * x <- 对所有x求和
#dC/db = delta <- 对所有x求和
def get_gradient_1():
    gradient_w = np.zeros((3, 12))
    gradient_b = np.zeros(3)
    for i in range(N):
        gradient_w[0] += x[i] * delta_1[i, 0]
        gradient_w[1] += x[i] * delta_1[i, 1]
        gradient_w[2] += x[i] * delta_1[i, 2]

        gradient_b += delta_1[i]

    return gradient_w, gradient_b


gradient_w_1, gradient_b_1 = get_gradient_1()
gradient_w_1, gradient_b_1

(array([[ 4.05233159e-02,  6.83227585e-02, -2.23664065e-02,
         -1.46557740e-02,  1.03294660e-01, -1.32018900e-02,
         -1.38212794e-02,  9.30558590e-02, -2.18880163e-02,
         -3.32488398e-05,  8.05971291e-02, -1.08854079e-02],
        [-1.88945094e-02,  1.92699466e-01, -2.94810051e-01,
         -4.80701480e-01,  5.88722435e-01, -3.94014844e-01,
         -5.33813037e-01,  6.44996571e-01, -4.12351372e-01,
         -2.86739148e-01,  1.86757266e-01, -3.95883564e-01],
        [-4.91294388e-01, -7.93739379e-01,  3.75313285e-02,
          1.64883495e-02, -9.59411464e-01, -8.56413832e-02,
          1.59010319e-02, -9.22133439e-01, -1.28516297e-01,
         -1.16847282e-01, -8.89089583e-01, -1.62704720e-01]]),
 array([ 0.08252767,  0.1210072 , -0.93234667]))

In [13]:
#求梯度
#dC/dw = delta * x <- 对所有x求和
#dC/db = delta <- 对所有x求和
def get_gradient_2():
    gradient_w = np.zeros((2, 3))
    gradient_b = np.zeros(2)
    for i in range(N):
        gradient_w[0] += out_1[i] * delta_2[i, 0]
        gradient_w[1] += out_1[i] * delta_2[i, 1]

        gradient_b += delta_2[i]

    return gradient_w, gradient_b


gradient_w_2, gradient_b_2 = get_gradient_2()
gradient_w_2, gradient_b_2

(array([[ 0.54097704, -1.9398653 , -0.13472918],
        [-1.15723532, -2.10601752, -1.02770687]]),
 array([-2.49194683, -3.2636605 ]))

In [14]:
#更新参数
def update():
    for i in range(3):
        layer_1.ns[i].w -= gradient_w_1[i] * lr
        layer_1.ns[i].b -= gradient_b_1[i] * lr

    for i in range(2):
        layer_2.ns[i].w -= gradient_w_2[i] * lr
        layer_2.ns[i].b -= gradient_b_2[i] * lr


update()

layer_1.ns[0].w, layer_1.ns[0].b, layer_2.ns[0].w, layer_2.ns[0].b

(array([ 0.48189534,  0.33433545,  0.07747328,  0.83993115, -0.09165893,
        -3.61435962, -0.53323574, -0.04161117, -1.7126224 , -1.45599335,
        -0.57211943,  0.85417708]),
 -0.2015055348055877,
 array([0.27980459, 1.19097306, 0.05594584]),
 -0.9396106341633224)

In [15]:
#批量训练
for _ in range(49):
    out_1, out_2 = get_out()
    
    delta_2 = get_delta_2()
    delta_1 = get_delta_1()
    
    gradient_w_1, gradient_b_1 = get_gradient_1()
    gradient_w_2, gradient_b_2 = get_gradient_2()
    
    update()
    
layer_1.ns[0].w, layer_1.ns[0].b, layer_2.ns[0].w, layer_2.ns[0].b

(array([ 0.43966452,  0.79168649, -0.11471806,  0.85896993,  0.30173189,
        -3.69998133, -0.48450415,  0.31631447, -1.93983601, -1.43356086,
        -0.11891537,  0.6508831 ]),
 0.25076062113669856,
 array([-1.31244417,  3.5897345 , -3.05645996]),
 -0.32577782301713837)

In [16]:
#测试
correct = 0
for i in range(N):
    pred = out_2[i].argmax()

    if pred == y[i].argmax():
        correct += 1

correct / N

1.0