In [2]:
import os
import tensorflow as tf
import matplotlib
from tensorflow import keras
from tensorflow.keras import layers, optimizers, datasets
from matplotlib import pyplot as plt


# 数据读取和预处理
# 1. 读取训练集、测试集数据
# 2. 对数据进行预处理。X数据（像素灰度）进行归一化处理，一般处理到[0, 1]或[-1, 1]之间
#    对Y数据（分类标签）进行one-hot编码，去掉数字标签可能带来的大小关系
# 3. 使用tf.data.Dataset.from_tensor_slice((x, y)) 将三维的图片数据，按照第一个维度进行展开，及进行“打平”操作
# 4. 使用tf.data.Dataset.from_tensor_slice.batch 方法，设置批处理数据的大小
(x, y), (x_val, y_val) = tf.keras.datasets.mnist.load_data() #  如果没有from ... import ... 语句，需要一层层引用
x = tf.convert_to_tensor(x, dtype=tf.float32)/255. # 转换数据范围到[0,1]
y = tf.convert_to_tensor(y, dtype=tf.int32)
y = tf.one_hot(y, depth=10) # one-hot 编码，去掉标签的大小关系
print(x.shape, y.shape)
# tf.data.Dataset.from_tensor_slices真正作用是切分传入Tensor的第一个维度，生成相应的dataset，即第一维表明数据集中数据的数量，之后切分batch等操作都以第一维为基础。
# 打平后的数据维度为((28, 28)图片, (10, )标记)
train_dataset = tf.data.Dataset.from_tensor_slices((x, y)) 
print(train_dataset)
train_dataset = train_dataset.batch(600) # 暂：转换完的维度(None, 28, 28)中None
#train_dataset = train_dataset.repeat(20) # 如果用for epoch in range(20)，则不用这一句，效果一样
print(train_dataset)

(60000, 28, 28) (60000, 10)
<TensorSliceDataset shapes: ((28, 28), (10,)), types: (tf.float32, tf.float32)>
<BatchDataset shapes: ((None, 28, 28), (None, 10)), types: (tf.float32, tf.float32)>


In [16]:
# 网络搭建
# 1. 使用Sequential容器，搭建3曾网络如下
# 2. 需要乡下一层传递时，需使用activation函数
# 3. 输出层因不用向下一层传递，因此不用激活函数activation
model = keras.Sequential([ # 三个非线性层的嵌套模型，包括两个隐藏层，一个输出层
        layers.Dense(256, activation='relu'), # 隐藏层1，用relu作为激活函数
        layers.Dense(512, activation='relu'),
        layers.Dense(10)]) # 输出层不用激活函数，输出节点数为10

# 定义优化器
optimizer = optimizers.SGD(learning_rate=0.1) 
print(train_dataset)

<BatchDataset shapes: ((None, 28, 28), (None, 10)), types: (tf.float32, tf.float32)>


In [9]:
# 模型训练

losses = []
# ''' 用epoch和for循环实现迭代训练
for epoch in range(30):
    # 第四步： 循环迭代优化
    for step, (x, y) in enumerate(train_dataset): # python 语法，enumerate()方法将train_dataset中的后两维数据以枚举形式付给step对应的(x, y)变量
        with tf.GradientTape() as tape: # python 语法，对于后续需要释放的资源，使用with...as...语句，避免忘记释放资源
            # ？？？
            x = tf.reshape(x, (-1, 28*28)) # 打平操作
            out = model(x) # 第一步：得到模型输出, 相当于表达式中的Y
            # 第二步：计算平均误差
            loss = tf.square(out - y) # 计算平方和[b, 10]
            loss = tf.reduce_sum(loss) / x.shape[0] # 计算每个样本的平均误差[b]
            # 第三步：计算并优化参数[w1, w2, w3, b1, b2, b3]
            grads = tape.gradient(loss, model.trainable_variables) # 自动计算梯度
            optimizer.apply_gradients(zip(grads, model.trainable_variables)) # w'=w-lr*grad, 更新网络参数
        # 每500次计算绘制一次图像
        if step % 20 == 0: 
            print(epoch, step, 'loss:', loss.numpy())
        # print(epoch, step, 'loss:', loss.numpy()) # step = 数据第一维度数量（图片张数）/ n； 最大值为 batch(n) 中的n
    losses.append(float(loss))
# '''

# 储存模型
model.save('mnist_model.h5')
# 储存权重
model.save_weights('mnist_model_weight.h5')
# 储存checkpoint

0 0 loss: 1.8820798
0 20 loss: 0.6128091
0 40 loss: 0.44580832
0 60 loss: 0.41272196
0 80 loss: 0.30545777
1 0 loss: 0.29905117
1 20 loss: 0.3122517
1 40 loss: 0.26045063
1 60 loss: 0.25969476
1 80 loss: 0.21582326
2 0 loss: 0.20495807
2 20 loss: 0.2327683
2 40 loss: 0.21076581
2 60 loss: 0.21348256
2 80 loss: 0.18165094
3 0 loss: 0.17308609
3 20 loss: 0.19999586
3 40 loss: 0.18444395
3 60 loss: 0.18904938
3 80 loss: 0.16246955
4 0 loss: 0.1547785
4 20 loss: 0.18063948
4 40 loss: 0.16685429
4 60 loss: 0.17246328
4 80 loss: 0.14926074
5 0 loss: 0.14172319
5 20 loss: 0.16635369
5 40 loss: 0.15403236
5 60 loss: 0.16019313
5 80 loss: 0.13918146
6 0 loss: 0.1321174
6 20 loss: 0.15535897
6 40 loss: 0.14401066
6 60 loss: 0.15048075
6 80 loss: 0.13082191
7 0 loss: 0.12446917
7 20 loss: 0.14652878
7 40 loss: 0.13570285
7 60 loss: 0.14250867
7 80 loss: 0.1239799
8 0 loss: 0.11822276
8 20 loss: 0.13899048
8 40 loss: 0.12857303
8 60 loss: 0.1356633
8 80 loss: 0.11819247
9 0 loss: 0.11285395
9 20 l

In [11]:
# 打印model中的各个参数
model.summary()  # 打印模型信息


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              multiple                  200960    
_________________________________________________________________
dense_4 (Dense)              multiple                  131584    
_________________________________________________________________
dense_5 (Dense)              multiple                  5130      
Total params: 337,674
Trainable params: 337,674
Non-trainable params: 0
_________________________________________________________________


{'name': 'SGD',
 'learning_rate': 0.1,
 'decay': 0.0,
 'momentum': 0.0,
 'nesterov': False}

In [7]:
model.get_weights() # 打印模型中的权重

[array([[-0.03869753, -0.00843564, -0.03202304, ..., -0.02361105,
         -0.03781034,  0.04538522],
        [ 0.02173847, -0.05168454,  0.02655206, ...,  0.05159335,
         -0.03136662,  0.01947975],
        [-0.04007988, -0.06495117, -0.05982079, ...,  0.04057609,
         -0.01051266,  0.0242835 ],
        ...,
        [-0.06600653,  0.03256996,  0.05081888, ...,  0.06586224,
         -0.00046503,  0.03464179],
        [-0.06100262,  0.00493716, -0.07507886, ..., -0.04582621,
         -0.04807085, -0.00524362],
        [-0.01904261, -0.01534906, -0.01969153, ..., -0.00740599,
          0.07291673, -0.0192333 ]], dtype=float32),
 array([-2.27938732e-03,  9.20504481e-02,  5.02940826e-02, -2.56354082e-02,
         3.59268822e-02,  6.73896074e-02,  5.18304529e-03,  3.67791653e-02,
        -8.28697160e-03,  7.80597255e-02, -1.77499112e-02,  1.93625670e-02,
         6.45609386e-03,  2.84080133e-02, -5.90319373e-03,  1.14830494e-01,
        -3.76271866e-02, -1.05293654e-02,  6.61319867e

In [11]:
model.get_layer(index=1) # 返回网络中某一层的信息

<tensorflow.python.keras.layers.core.Dense at 0x10e50d7d0>