# 导入需要的库

In [25]:
import numpy as np
import functions as f
from gradient import numerical_gradient
from collections import OrderedDict
from layers import *
import pickle
import struct
import matplotlib.pyplot as plt

# 定义读取数据集的函数

In [26]:

def load_mnist_data(kind):
    '''
    加载数据集
    :param kind: 加载训练数据还是测试数据
    :return: 打平之后的数据和one hot编码的标签
    '''
    labels_path = '../data/%s-labels-idx1-ubyte' % kind
    images_path = '../data/%s-images-idx3-ubyte' % kind
    with open(labels_path, 'rb') as lbpath:
        struct.unpack('>II', lbpath.read(8))
        labels = np.fromfile(lbpath, dtype=np.uint8)
    with open(images_path, 'rb') as imgpath:
        struct.unpack('>IIII', imgpath.read(16))
        images = np.fromfile(imgpath, dtype=np.uint8).reshape(len(labels), 784)

    return images / 255., np.eye(10)[labels]

# 定义神经网络架构

In [32]:

class NLayerMLP:

    def __init__(self, sizes, weight_init_std = 0):
        # 存储权重值和偏置值
        self.sizes = sizes
        self.weights = []
        self.biases = []
        for input_size, output_size in zip(sizes[0:-1], sizes[1:]):
            # 初始化权重
            weight = weight_init_std * np.random.randn(input_size, output_size)
            bias = np.zeros(output_size)
            self.weights.append(weight)
            self.biases.append(bias)
        # 这个神经网络的层数为n-1
        size = len(sizes)-1
        # 生成层
        self.layers = OrderedDict()
        for i in range(0, size):
            if i != size-1:
                self.layers[f'Affine{i}'] = Affine(self.weights[i], self.biases[i])
                #self.layers['Relu1'] = Relu()
                self.layers[f'Sigmoid1{i}'] = Sigmoid()
            else:
                # 这里没有激活函数
                self.layers[f'Affine{i}'] = Affine(self.weights[i], self.biases[i])
        self.lastLayer = SoftmaxWithLoss()
        #self.lastLayer = IdentityWithLoss()
        self.params = dict()
        self.params['sizes'] = self.sizes
        self.params['biases'] = self.biases
        self.params['weights'] = self.weights
        self.params['layers'] = self.layers

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)

        return x

    # x:输入数据, t:监督数据
    def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1 : t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

    # x:输入数据, t:监督数据
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)

        grads = {}
        for i in range(0, len(self.sizes)-1):
            grads[f'W{i}'] = numerical_gradient(loss_W, self.weights[i])
            grads[f'b{i}'] = numerical_gradient(loss_W, self.biases[i])

        return grads

    def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.lastLayer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # 设定
        grads = {}
        for i in range(0, len(self.sizes)-1):
            grads[f'W{i}'], grads[f'b{i}'] = self.layers[f'Affine{i}'].dW, self.layers[f'Affine{i}'].db

        return grads



    def sgd(self, training_data, epochs, batch_size, lr, test_data=None, iter_num=10, save_path=None,
            loss_path=None):
        train_features, train_labels = training_data
        test_features, test_labels = test_data
        train_size = len(train_labels)
        test_size = len(test_labels)
        for i in range(epochs):
            for batch_index in range(0, train_size, batch_size):
                lower_range = batch_index
                upper_range = batch_index + batch_size
                if upper_range > train_size:
                    upper_range = train_size
                x_batch = train_features[lower_range: upper_range, :]
                y_batch = train_labels[lower_range: upper_range]
                # 计算梯度
                #grad = self.numerical_gradient(x_batch, y_batch)
                grad = self.gradient(x_batch, y_batch)
                # 更新参数
                for index in range(len(network.sizes)-1):
                    self.weights[index] -= lr * grad[f"W{index}"]
                    self.biases[index] -= lr * grad[f"b{index}"]

            loss = self.loss(x_batch, y_batch)
            loss_test = self.loss(test_images, test_labels)
            accuracy = np.sum(self.predict(test_features).argmax(1) == test_labels.argmax(1))/test_size
            #if i % iter_per_epoch == 0:
            if i % iter_num == 0:
                print(f"epoch: {i}, train loss, test loss , accuracy |{loss} , {loss_test}, {accuracy*100}%")
                if save_path:
                    self.save(save_path)
                    print("Weight has been saved !!")
                if loss_path:
                    with open(loss_path, "a") as f:
                        if i == 0:
                            f.write("epoch train_loss test_loss  accuracy\n")
                            f.write(f"{loss}  {loss_test} {accuracy}\n")
                        else:
                            f.write(f"{loss}  {loss_test} {accuracy}\n")


    def adam(self, training_data, epochs, batch_size, lr, test_data=None, iter_num=10, save_path=None,
             loss_path=None):
        train_features, train_labels = training_data
        test_features, test_labels = test_data
        train_size = len(train_labels)
        test_size = len(test_labels)
        for i in range(epochs):
            for batch_index in range(0, train_size, batch_size):
                lower_range = batch_index
                upper_range = batch_index + batch_size
                if upper_range > train_size:
                    upper_range = train_size
                x_batch = train_features[lower_range: upper_range, :]
                y_batch = train_labels[lower_range: upper_range]
                # 计算梯度
                #grad = self.numerical_gradient(x_batch, y_batch)
                grad = self.gradient(x_batch, y_batch)
                # 更新参数, 这里使用adam算法
                rho1, rho2 = 0.9, 0.999
                delta = 1e-8
                for index in range(len(network.sizes)-1):
                    weight = self.weights[index]
                    weight_grad = grad[f"W{index}"]
                    s = np.zeros_like(weight)
                    r = np.zeros_like(weight)
                    # update the first and the second moment
                    s = rho1 * s + (1 - rho1) * weight_grad
                    r = rho2 * r + (1 - rho2) * np.square(weight_grad)
                    # get the partial first and second moment
                    s_hat = s / (1 - rho1 ** (i + 1))
                    r_hat = r / (1 - rho1 ** (i + 1))
                    # use the partial first and second moment to modify the GD method
                    self.weights[index] -= lr * s_hat / (np.sqrt(r_hat) + delta)
                    bias = self.biases[index]
                    bias_grad = grad[f"b{index}"]
                    s = np.zeros_like(bias)
                    r = np.zeros_like(bias)
                    # update the first and the second moment
                    s = rho1 * s + (1 - rho1) * bias_grad
                    r = rho2 * r + (1 - rho2) * np.square(bias_grad)
                    # get the partial first and second moment
                    s_hat = s / (1 - rho1 ** (i + 1))
                    r_hat = r / (1 - rho1 ** (i + 1))
                    # use the partial first and second moment to modify the GD method
                    self.biases[index] -= lr * s_hat / (np.sqrt(r_hat) + delta)

            loss = self.loss(x_batch, y_batch)
            loss_test = self.loss(test_images, test_labels)
            accuracy = np.sum(self.predict(test_features).argmax(1) == test_labels.argmax(1))/test_size
            #if i % iter_per_epoch == 0:
            if i % iter_num == 0:
                print(f"epoch: {i}, train loss, test loss , accuracy |{loss} , {loss_test}, {accuracy*100}%")
                if save_path:
                    self.save(save_path)
                    print("Weight has been saved !!")
                if loss_path:
                    with open(loss_path, "a") as f:
                        if i == 0:
                            f.write("epoch train_loss test_loss  accuracy\n")
                            f.write(f"{loss}  {loss_test} {accuracy}\n")
                        else:
                            f.write(f"{loss}  {loss_test} {accuracy}\n")

    # 保存权重文件
    def save(self,path):
        with open(path,'wb') as f :
            self.params['sizes'] = self.sizes
            self.params['biases'] = self.biases
            self.params['weights'] = self.weights
            self.params['layers'] = self.layers
            pickle.dump(self.params, f)

    # 导入权重文件
    def load(self,path):
        with open(path,'rb') as f :
            self.params = pickle.load(f)
            self.sizes = self.params['sizes']
            self.biases = self.params['biases']
            self.weights = self.params['weights']
            self.layers = self.params['layers']


        #print(weight)



# 导入训练集和测试集

In [4]:
train_images, train_labels = load_mnist_data(kind='train')
train_images = np.reshape(train_images, newshape=(-1, 1, 784))
test_images, test_labels = load_mnist_data('t10k')
test_images = np.reshape(test_images, newshape=(-1, 1, 784))

In [5]:
training_set_size = len(train_images)
testing_set_size = len(test_images)

In [39]:
## check gradient

network = NLayerMLP(sizes=[784, 32, 2, 10])
mini_batch_size = 100

grad_numerical = network.numerical_gradient(train_images[:mini_batch_size], train_labels[:mini_batch_size])
grad_backprop = network.gradient(train_images[:mini_batch_size], train_labels[:mini_batch_size])


for key in grad_numerical.keys():
    diff = np.average( np.abs(grad_backprop[key] - grad_numerical[key]) )
    print(key + ":" + str(diff))

W0:8.575984678652584e-13
b0:1.3007690419456141e-12
W1:7.20684311049466e-11
b1:1.412010317697332e-10
W2:1.1215424440975389e-08
b2:2.1963221961225284e-08


In [45]:
# 定义神经网络架构
network = NLayerMLP(sizes=[784, 64, 32, 10])
weight_save_path  = "weight_multi_layer_adam.pickle"
epoches = 100000  # 适当设定循环的次数
train_size = train_images.shape[0]
batch_size = 128
learning_rate = 1e-4
train_loss_list = []
test_loss_list = []
iter_num = 10
load_weight = True
train_net = True
if load_weight:
    network.load(weight_save_path)
if True:
    accuracy = np.sum(network.predict(test_images).argmax(1) == test_labels.argmax(1))/testing_set_size
    print(f"epoch: , train loss, test loss , accuracy |, {accuracy*100}%")
    print("Weight has been saved !!")
network.adam(training_data=(train_images, train_labels), epochs=epoches, batch_size=batch_size,
            lr=learning_rate, test_data=(test_images, test_labels), iter_num=10,
            save_path=weight_save_path)

epoch: , train loss, test loss , accuracy |, 95.84%
Weight has been saved !!
epoch: 0, train loss, test loss , accuracy |0.21743682088681396 , 0.17378501691924672, 95.75%
Weight has been saved !!
epoch: 10, train loss, test loss , accuracy |0.2110085962507403 , 0.18207387568266728, 95.69%
Weight has been saved !!


KeyboardInterrupt: 

# 验证权重和偏置都为0时

In [52]:
# 定义神经网络架构
network = NLayerMLP(sizes=[784, 128, 10])
weight_save_path  = "weight_64_hidden.pickle"
epoches = 10000  # 适当设定循环的次数
train_size = train_images.shape[0]
batch_size = 128
learning_rate = 1e-2
train_loss_list = []
test_loss_list = []
iter_num = 1
load_weight = False
train_net = True
if load_weight:
    network.load(weight_save_path)
if True:
    accuracy = np.sum(network.predict(test_images).argmax(1) == test_labels.argmax(1))/testing_set_size
    print(f"epoch: , train loss, test loss , accuracy |, {accuracy*100}%")
    print("Weight has been saved !!")
network.sgd(training_data=(train_images, train_labels), epochs=epoches, batch_size=batch_size,
             lr=learning_rate, test_data=(test_images, test_labels), iter_num=iter_num,
             save_path=weight_save_path, loss_path="single_layer_numpy_sgd.txt")

epoch: 661, train loss, test loss , accuracy |1.6799289539170992 , 1.5306884115579271, 38.75%
Weight has been saved !!
epoch: 662, train loss, test loss , accuracy |1.6799115628929446 , 1.53066593287219, 38.75%
Weight has been saved !!
epoch: 663, train loss, test loss , accuracy |1.6798941480787288 , 1.530643524834076, 38.75%
Weight has been saved !!


KeyboardInterrupt: 

In [49]:
# 定义神经网络架构
network = NLayerMLP(sizes=[784, 32,4, 10])
weight_save_path  = "weight_64_hidden.pickle"
epoches = 100000  # 适当设定循环的次数
train_size = train_images.shape[0]
batch_size = 128
learning_rate = 1e-4
train_loss_list = []
test_loss_list = []
iter_num = 1
load_weight = True
train_net = True
if load_weight:
    network.load(weight_save_path)
if True:
    accuracy = np.sum(network.predict(test_images).argmax(1) == test_labels.argmax(1))/testing_set_size
    print(f"epoch: , train loss, test loss , accuracy |, {accuracy*100}%")
    print("Weight has been saved !!")
network.sgd(training_data=(train_images, train_labels), epochs=epoches, batch_size=batch_size,
            lr=learning_rate, test_data=(test_images, test_labels), iter_num=iter_num,
            save_path=weight_save_path, loss_path="single_layer_numpy_adam4.txt")

epoch: , train loss, test loss , accuracy |, 20.75%
Weight has been saved !!
epoch: 0, train loss, test loss , accuracy |2.1283100007268407 , 2.076106760557496, 20.75%
Weight has been saved !!
epoch: 1, train loss, test loss , accuracy |2.1282113165164653 , 2.076112322932562, 20.75%
Weight has been saved !!
epoch: 2, train loss, test loss , accuracy |2.1281253676766254 , 2.0761171416420927, 20.74%
Weight has been saved !!
epoch: 3, train loss, test loss , accuracy |2.1280514256212837 , 2.0761211863424385, 20.74%
Weight has been saved !!
epoch: 4, train loss, test loss , accuracy |2.127988800356441 , 2.0761244284869242, 20.74%
Weight has been saved !!
epoch: 5, train loss, test loss , accuracy |2.1279368388675697 , 2.076126840473701, 20.73%
Weight has been saved !!
epoch: 6, train loss, test loss , accuracy |2.1278949235170113 , 2.07612839490973, 20.73%
Weight has been saved !!
epoch: 7, train loss, test loss , accuracy |2.1278624704614657 , 2.0761290640082795, 20.73%
Weight has been sa

KeyboardInterrupt: 