## 数据准备

首先，我们需要准备训练数据，我们有以下选择：

1. 使用sklearn中的手写数字（8*8）；
2. 使用 [tfds](https://github.com/tensorflow/datasets/tree/master) 中的手写数字(28*28) [mnist](https://www.tensorflow.org/datasets/catalog/mnist)。

sklean 中的数字的分辨率比较差，人工识别可能都有困难；我们使用 tfds 中的 mnist 数据集。

另外，tfds 使用手册可见 [TensorFlow Datasets](https://www.tensorflow.org/datasets/overview?hl=zh-cn)。

In [None]:
from sklearn import datasets
from matplotlib import pyplot

# sklearn中的手写数据集
digits = datasets.load_digits()

pyplot.gray()
pyplot.matshow(digits.images[8])
# pyplot.matshow(digits.images[16])
pyplot.show()

In [None]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
from matplotlib import pyplot as plt

# tensorflow中的手写数据集
mnist, info = tfds.load("mnist", split="train", shuffle_files=True, with_info=True)

# 通过pyplot呈现
# plt.gray()
# for example in tfds.as_numpy(mnist.take(3)):
#     print(example.keys())
#     plt.matshow(example["image"])

# 通过pandas.DataFrame呈现
# tfds.as_dataframe(mnist.take(5), info)

# 调用tfds.show_examples呈现
tfds.show_examples(mnist.take(2), info)

## 通过深度学习实现手写数字识别

### 通过keras API实现关于手写数字识别的深度学习

主要步骤是：

1. 通过 `keras.Sequential` 和 `keras.layers.Dense` 创建模型。需要指定模型的层数、每层的节点数以及每层的激活函数。
2. 为模型指定损失函数、优化器和指标。损失函数用于衡量模型预测值偏离实际值的程度，在训练过程中我们的目标是将损失最小化；优化器决定如何基于损失函数对神经网络进行更新；指标是衡量成功的标准，在训练和验证过程中需要对其进行监控，与损失不同，训练过程不会直接对指标进行优化。
3. 利用训练数据对模型进行训练。

In [None]:
import numpy as np
# import tensorflow as tf
import keras
from matplotlib import pyplot as plt

# 加载训练数据和测试数据
(orig_train_images, train_labels), (orig_test_images, test_labels) = keras.datasets.mnist.load_data()
xx = np.array([])
assert(orig_train_images.shape == (60000, 28, 28))
assert(train_labels.shape == (60000,))
assert(orig_test_images.shape == (10000, 28, 28))
assert(test_labels.shape == (10000,))
train_images = orig_train_images.reshape(60000, 28*28)
train_images = train_images.astype("float32") / 255
test_images = orig_test_images.reshape(10000, 28*28)
test_images = test_images.astype("float32") / 255

# 构建深度学习模型
model = keras.Sequential([
    keras.layers.Dense(512, activation="relu"),
    keras.layers.Dense(10, activation="softmax")
])

# 编译，即：为模型指定损失函数和优化器
model.compile(optimizer="rmsprop",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

# 训练
model.fit(train_images[0:10000], train_labels[0:10000], epochs=5, batch_size=128)

# 预测
start_idx = np.random.randint(0, 10000-10)
predictions_as_prop = model.predict(test_images[start_idx:start_idx+10])
predictions = []
for a in predictions_as_prop:
    predictions.append(np.argmax(a))

plt.gray()
fig, axes = plt.subplots(2, 5)
fig.set_size_inches(5*2.5, 2*2.5)
for i in range(10):
    axes[i//5, i%5].matshow(orig_test_images[start_idx+i])
plt.show()

print("%+12s:"%"predictions", np.array(predictions))
print("%+12s:"%"real", test_labels[start_idx:start_idx+10])

### 基于TensorFlow从头开始实现深度学习

In [None]:
from typing import Any
import math
import numpy as np
import tensorflow as tf
import keras
from matplotlib import pyplot as plt

# 简单的层对象的实现
class SimpleDense:
    def __init__(self, units, activation) -> None:
        self.activation = activation
        self.units = units
        self.built = False

    def __repr__(self) -> str:
        return "weights:%s\nbias:%s" % (self.W.__repr__(), self.b.__repr__())

    def __call__(self, inputs):
        if not self.built:
            self.built = True
            self.build(inputs.shape[-1])
        return self.activation(tf.matmul(inputs, self.W) + self.b)
    
    def build(self, input_size):
        output_size = self.units

        # 通过随机值来初始化权重(weight)
        self.W = tf.Variable(tf.random.uniform((input_size, output_size), minval=0., maxval=0.1))

        # 初始化偏置(bias)
        self.b = tf.Variable(tf.zeros((output_size,)))

    @property
    def weights(self):
        return [self.W, self.b]

# 模型对象
class SimpleSequential:
    def __init__(self, layers) -> None:
        assert(len(layers) > 0)
        self.layers = layers

    def __call__(self, inputs):
        x = inputs
        for layer in self.layers:
            x = layer(x)
        return x

    @property
    def weights(self):
        w = []
        for layer in self.layers:
            w += layer.weights
        return w

# 权重更新
# learning_rate = 0.01
# def update_weights(gradients, weights):
#     for g, w in zip(gradients, weights):
#         w.assign_sub(g*learning_rate)

optimizer = keras.optimizers.SGD(learning_rate=0.01)
def update_weights(gradients, weights):
    optimizer.apply_gradients(zip(gradients, weights))

# 一次训练过程
def one_traning_step(model: SimpleSequential, images_batch, labels_batch):
    with tf.GradientTape() as tape:
        predictions = model(images_batch)
        per_sample_losses = keras.losses.sparse_categorical_crossentropy(labels_batch, predictions)
        average_loss = tf.reduce_mean(per_sample_losses)
    gradients = tape.gradient(average_loss, model.weights)
    update_weights(gradients, model.weights)
    return average_loss

# 批量数据生成器
class BatchGenerator:
    def __init__(self, images, labels, batch_size) -> None:
        assert(len(images) == len(labels))
        self.images = images
        self.labels = labels
        self.batch_size = batch_size
        self.num_batches = math.ceil(len(images)/batch_size)
        self.cur_index = 0

    def next(self):
        start_idx, end_idx = self.cur_index, self.cur_index+self.batch_size
        self.cur_index += self.batch_size
        return self.images[start_idx:end_idx], self.labels[start_idx:end_idx]
    
    def reset(self):
        self.cur_index = 0

# 完整的训练过程
def fit(model: SimpleSequential, images, labels, epoches, batch_size=128):
    for epoch_counter in range(epoches):
        print(f"Epoch {epoch_counter+1}")
        batch_generotor = BatchGenerator(images, labels, batch_size)
        for batch_counter in range(batch_generotor.num_batches):
            images_batch, labels_batch = batch_generotor.next()
            loss = one_traning_step(model, images_batch, labels_batch)
            if batch_counter%100 == 0:
                print(f"loss at batch {batch_counter}: {loss}")

# 计算模型准确率
def accuracy(model:SimpleSequential, images, labels, batch_size=128) -> float:
    batch_generotor = BatchGenerator(images, labels, batch_size)
    for batch_counter in range(batch_generotor.num_batches):
        images_batch, labels_batch = batch_generotor.next()
        predictions = np.argmax(model(images_batch), axis=1)
        y = (predictions == labels_batch)
        y = y.astype("float32")
        return tf.reduce_mean(y)

# 加载训练数据和测试数据
(orig_train_images, train_labels), (orig_test_images, test_labels) = keras.datasets.mnist.load_data()
xx = np.array([])
assert(orig_train_images.shape == (60000, 28, 28))
assert(train_labels.shape == (60000,))
assert(orig_test_images.shape == (10000, 28, 28))
assert(test_labels.shape == (10000,))
train_images = orig_train_images.reshape(60000, 28*28)
train_images = train_images.astype("float32") / 255
test_images = orig_test_images.reshape(10000, 28*28)
test_images = test_images.astype("float32") / 255

# 构建模型
model = SimpleSequential([
    SimpleDense(512, keras.activations.relu),
    SimpleDense(10, keras.activations.softmax)
])

# 训练模型
fit(model, train_images, train_labels, 5, 128)

# 模型准确率
print("accuracy on test images: %.2f%%100" % (accuracy(model, test_images, test_labels)*100))

# 预测
start_idx = np.random.randint(0, 10000-10)
predictions_as_arr = model(test_images[start_idx:start_idx+10])
predictions = np.argmax(predictions_as_arr, axis=1)

plt.gray()
fig, axes = plt.subplots(2, 5)
fig.set_size_inches(5*2.5, 2*2.5)
for i in range(10):
    axes[i//5, i%5].matshow(orig_test_images[start_idx+i])
plt.show()

print("%+12s:"%"predictions", np.array(predictions))
print("%+12s:"%"real", test_labels[start_idx:start_idx+10])