In [1]:
import numpy as np

In [2]:
class SigmodActivator:
    def forward(self, weight_input):
        return 1.0 / (1.0 + np.exp(-weight_input))
    def backward(self, output):
        return output * (1 - output)

In [104]:
class FullConnectedLayer:
    def __init__(self, input_dim, output_dim, activator):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.activator = activator
        self.weight = np.random.uniform(-0.1, 0.1, (self.output_dim, self.input_dim)) #shape=(output_dim, input_dim)
        self.bias = np.zeros(self.output_dim) #shape=(output_dim,)
    def forward(self, input_array):
        self.input = input_array
        self.output = self.activator.forward(np.dot(self.weight, input_array) + self.bias) #shape=(output_dim,)
    def backward(self, delta_array):
        self.delta = self.activator.backward(self.input) * np.dot(self.weight.T, delta_array)
        self.weight_grad = np.matmul(np.reshape(delta_array, (-1,1)), np.reshape(self.input, (1,-1))) #np.dot(delta_array, self.input.T)
        self.bias_grad = delta_array
    def update(self, learning_rate):
        self.weight += learning_rate * self.weight_grad
        self.bias += learning_rate * self.bias_grad

In [106]:
class Network:
    def __init__(self, layer_nodes):
        self.layers = []
        for i in range(len(layer_nodes) - 1):
            self.layers.append(FullConnectedLayer(layer_nodes[i], layer_nodes[i+1], SigmodActivator()))
    def predict(self, sample):
        output = sample
        for layer in self.layers:
            layer.forward(output)
            output = layer.output
            #print(output.shape)
        return output
    def calc_gradient(self, label):
        #print(self.layers[-1].output.shape)
        #print(label.shape)
        delta = self.layers[-1].activator.backward(self.layers[-1].output) * (label - self.layers[-1].output)
        for layer in self.layers[::-1]:
            layer.backward(delta)
            delta = layer.delta
        return delta
    def update(self, learning_rate):
        for layer in self.layers:
            layer.update(learning_rate)
    def train_one_sample(self, label, sample, learning_rate):
        self.predict(sample)
        self.calc_gradient(label)
        self.update(learning_rate)
    def train(self, labels, data_set, learning_rate, epochs):
        for i in range(epochs):
            for label, sample in zip(labels, data_set):
                self.train_one_sample(label, sample, learning_rate)

In [71]:
import struct
class Loader(object):
    def __init__(self, path, count):
        self.path = path
        self.count = count
    def get_file_content(self):
        fp = open(self.path, "rb")
        cont = fp.read()
        fp.close()
        return cont
    def to_int(self, byte):
        return byte #struct.unpack("B", byte)[0]
class ImageLoader(Loader):
    def get_picture(self, content, index):
        start = index * 28 * 28 + 16
        picture = []
        for i in range(28):
            picture.append([])
            for j in range(28):
                picture[i].append(self.to_int(content[start + i * 28 + j]))
        return picture
    def get_one_sample(self, picture):
        sample = []
        for i in range(28):
            for j in range(28):
                sample.append(picture[i][j])
        return sample
    def load(self):
        cont = self.get_file_content()
        data_set = []
        for index in range(self.count):
            data_set.append(self.get_one_sample(self.get_picture(cont, index)))
        return data_set
class LabelLoader(Loader):
    def load(self):
        cont = self.get_file_content()
        labels = []
        for index in range(self.count):
            labels.append(self.norm(cont[index + 8]))
        return labels
    def norm(self, label):
        label_vec = []
        label_value = self.to_int(label)
        for i in range(10):
            if i == label_value:
                label_vec.append(0.9)
            else:
                label_vec.append(0.1)
        return label_vec

In [108]:
def get_training_data_set():
    image_loader = ImageLoader("./data/train-images-idx3-ubyte", 60000)
    label_loader = LabelLoader("./data/train-labels-idx1-ubyte", 60000)
    return image_loader.load(), label_loader.load()
def get_test_data_set():
    image_loader = ImageLoader("./data/t10k-images-idx3-ubyte", 6000)
    label_loader = LabelLoader("./data/t10k-labels-idx1-ubyte", 6000)
    return image_loader.load(), label_loader.load()

In [109]:
train_features, train_labels = get_training_data_set()
print("train_features[0]={},train_labels[0]={}".format(train_features[0], train_labels[0]))
train_features, train_labels = np.array(train_features), np.array(train_labels)
print(train_features.shape)
print(train_labels.shape)

train_features[0]=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 18, 18, 18, 126, 136, 175, 26, 166, 255, 247, 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 36, 94, 154, 170, 253, 253, 253, 253, 253, 225, 172, 253, 242, 195, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, 238, 253, 253, 253, 253, 253, 253, 253, 253, 251, 93, 82, 82, 56, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 219, 253, 253, 253, 253, 253, 198, 182, 247, 241, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80, 156, 107, 253, 253, 205, 11, 0, 43, 154, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 1,

In [72]:
def get_result(prob):
    return np.argmax(prob)
def evaluation(network, test_data_set, test_labels):
    error = 0
    total = len(test_data_set)
    for i in range(total):
        label = get_result(test_labels[i])
        pred = get_result(network.predict(test_data_set[i]))
        if label != pred:
            error += 1
    return float(error) / float(total)

In [110]:
from datetime import datetime
def train_and_evalutation():
    last_error_retio = 1.0
    epochs = 0
    train_features, train_labels = get_training_data_set()
    test_features, test_labels = get_test_data_set()
    train_features, train_labels = np.array(train_features), np.array(train_labels)
    test_features, test_labels = np.array(test_features), np.array(test_labels)
    network = Network([784, 300, 10])
    while True:
        epochs += 1
        network.train(train_labels, train_features, 0.01, 1)
        print("{} epoch {} finished".format(datetime.now(), epochs))
        if epochs % 10 == 0:
            error_ratio = evaluation(network, test_features, test_labels)
            print("{} after epoch {}, test error ratio is {}".format(datetime.now(), epochs, error_ratio))
            if error_ratio > last_error_retio:
                break
            else:
                last_error_retio = error_ratio
train_and_evalutation()

2020-05-25 15:53:59.813661 epoch 1 finished
2020-05-25 15:55:10.096337 epoch 2 finished
2020-05-25 15:56:17.246352 epoch 3 finished
2020-05-25 15:57:19.933103 epoch 4 finished
2020-05-25 15:58:22.502882 epoch 5 finished
2020-05-25 15:59:24.936211 epoch 6 finished
2020-05-25 16:00:27.326480 epoch 7 finished
2020-05-25 16:01:29.772617 epoch 8 finished
2020-05-25 16:02:32.023038 epoch 9 finished
2020-05-25 16:03:34.676148 epoch 10 finished
2020-05-25 16:03:34.956161 after epoch 10, test error ratio is 0.8381666666666666
2020-05-25 16:04:37.702888 epoch 11 finished
2020-05-25 16:05:46.871256 epoch 12 finished
2020-05-25 16:06:51.282101 epoch 13 finished
2020-05-25 16:07:52.774257 epoch 14 finished
2020-05-25 16:09:01.023804 epoch 15 finished
2020-05-25 16:10:11.733789 epoch 16 finished
2020-05-25 16:11:16.927873 epoch 17 finished
2020-05-25 16:12:20.898848 epoch 18 finished
2020-05-25 16:13:24.988829 epoch 19 finished
2020-05-25 16:14:29.695305 epoch 20 finished
2020-05-25 16:14:29.975170 