In [1]:
import numpy as np


#加载数据
def load_data():
    with open('数字123.txt') as fr:
        lines = fr.readlines()

    x = np.empty((len(lines), 36), dtype=float)
    y = np.empty(len(lines), dtype=int)

    for i in range(len(lines)):
        line = lines[i].strip().split(',')
        x[i] = line[:36]
        y[i] = line[36]

    #y变成one hot编码
    y_one_hot = np.zeros((len(lines), 3), dtype=int)
    y_one_hot[y == 1, 0] = 1
    y_one_hot[y == 2, 1] = 1
    y_one_hot[y == 3, 2] = 1

    return x, y_one_hot


x, y = load_data()
x[:5], y[:5]

(array([[0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1.,
         0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
         0., 1., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1.,
         0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
         0., 1., 0., 0.],
        [0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1.,
         0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
         0., 1., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1.,
         0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
         1., 1., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1.,
         0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
         1., 1., 1., 0.]]),
 array([[1, 0, 0],
        [1, 0, 0],
        [1, 0, 0],
        [1, 0, 0],
        [1, 0, 0]]))

In [2]:
#常量
N, M = x.shape

In [3]:
#定义神经元对象
class Neural:
    def __init__(self, w, b):
        self.w = w
        self.b = b

    def run(self, xi):
        #线性计算
        self.z = np.multiply(xi, self.w).sum() + self.b

        #激活函数,sigmoid
        self.a = 1 / (1 + np.exp(-self.z))

In [4]:
#定义网络层对象
class Layer:
    def __init__(self, ns):
        #神经元列表
        self.ns = ns
        self.out = np.empty((len(ns)))

    #运行神经元,并记录运行结果
    def run(self, xi):
        for n in self.ns:
            n.run(xi)

        for i in range(len(self.ns)):
            self.out[i] = self.ns[i].a

In [5]:
#定义第一层神经网络
ns = []
for i in range(6):
    ns.append(Neural(np.random.randn(36), b=np.random.randn(1)[0]))
layer_1 = Layer(ns)

#运行第一层神经网络
layer_1.run(x[0])

layer_1.out

array([0.70977385, 0.4577683 , 0.05793808, 0.64739933, 0.81131448,
       0.82838851])

In [6]:
#定义第二层神经网络
ns = []
for i in range(3):
    ns.append(Neural(np.random.randn(6), b=np.random.randn(1)[0]))
layer_2 = Layer(ns)

#运行第二层神经网络
layer_2.run(layer_1.out)

layer_2.out

array([0.36844697, 0.5742036 , 0.95180042])

In [7]:
#求loss
def get_loss(x, y):
    layer_1.run(x)
    layer_2.run(layer_1.out)
    return np.sum((layer_2.out - y)**2)


def total_loss():
    _sum = 0
    for i in range(N):
        _sum += get_loss(x[i], y[i])
    return _sum


get_loss(x[0], y[0]), total_loss()

(1.634493038926888, 120.85891829051137)

In [8]:
#我发明的暴力求梯度的方法.
def get_gradient(n, x, y):
    #这个变量就是增量了
    upsilon = 1e-5

    #先算出给增量前的loss
    loss = get_loss(x, y)

    #求w梯度
    grad_w = np.empty(len(n.w))
    for i in range(len(n.w)):
        n.w[i] += upsilon
        loss2 = get_loss(x, y)
        n.w[i] -= upsilon
        grad_w[i] = (loss2 - loss) / upsilon

    #求b梯度
    n.b += upsilon
    loss2 = get_loss(x, y)
    n.b -= upsilon
    grad_b = (loss2 - loss) / upsilon

    return grad_w, grad_b


get_gradient(layer_1.ns[0], x[0], y[0])

(array([ 0.        ,  0.        ,  0.        , -0.03547165,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        , -0.03547165,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        -0.03547165,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        , -0.03547165,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        , -0.03547165,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        , -0.03547165,  0.        ,
         0.        ]),
 -0.03547165121631224)

In [9]:
#训练
lr = 1e-1
for epoch in range(20):
    for i in range(len(layer_2.ns)):
        n = layer_2.ns[i]
        for j in range(N):
            gred_w, gred_b = get_gradient(n, x[j], y[j])
            n.w -= gred_w * lr
            n.b -= gred_b * lr

    for i in range(len(layer_1.ns)):
        n = layer_1.ns[i]
        for j in range(N):
            gred_w, gred_b = get_gradient(n, x[j], y[j])
            n.w -= gred_w * lr
            n.b -= gred_b * lr

    if epoch % 1 == 0:
        print(epoch, total_loss())

0 71.27809979054456
1 55.53369600835406
2 47.64408605359174
3 39.200255958378676
4 28.59178316767759
5 19.637595883921538
6 14.464849222522808
7 11.336167390592282
8 9.27956962809844
9 7.834074941092935
10 6.7625328504537086
11 5.935215592660501
12 5.275599989003669
13 4.735778016691303
14 4.284561791902418
15 3.9010267865375927
16 3.570711092743413
17 3.283300228648476
18 3.031197471849734
19 2.808631234254595


In [10]:
#测试
correct = 0
for i in range(N):
    layer_1.run(x[i])
    layer_2.run(layer_1.out)
    if np.argmax(layer_2.out) == np.argmax(y[i]):
        correct += 1

correct / N

0.9895833333333334