In [1]:
import os
import tensorflow as tf
import matplotlib
from tensorflow import keras
from tensorflow.keras import layers, optimizers, datasets
from matplotlib import pyplot as plt

# 数据读取和预处理
# 1. 读取训练集、测试集数据
# 2. 对数据进行预处理。X数据（像素灰度）进行归一化处理，一般处理到[0, 1]或[-1, 1]之间
#    对Y数据（分类标签）进行one-hot编码，去掉数字标签可能带来的大小关系
# 3. 使用tf.data.Dataset.from_tensor_slice((x, y)) 将三维的图片数据，按照第一个维度进行展开，及进行“打平”操作
# 4. 使用tf.data.Dataset.from_tensor_slice.batch 方法，设置批处理数据的大小
(x, y), (x_val, y_val) = tf.keras.datasets.mnist.load_data() #  如果没有from ... import ... 语句，需要一层层引用
x = tf.convert_to_tensor(x, dtype=tf.float32)/255. # 转换数据范围到[0,1]
y = tf.convert_to_tensor(y, dtype=tf.int32)
y = tf.one_hot(y, depth=10) # one-hot 编码，去掉标签的大小关系
print(x.shape, y.shape)
# tf.data.Dataset.from_tensor_slices真正作用是切分传入Tensor的第一个维度，生成相应的dataset，即第一维表明数据集中数据的数量，之后切分batch等操作都以第一维为基础。
# 打平后的数据维度为((28, 28)图片, (10, )标记)
train_dataset = tf.data.Dataset.from_tensor_slices((x, y)) 
train_dataset = train_dataset.batch(500) # 暂：转换完的维度(None, 28, 28)中None
print(train_dataset)

(60000, 28, 28) (60000, 10)
<BatchDataset shapes: ((None, 28, 28), (None, 10)), types: (tf.float32, tf.float32)>


In [3]:
# 网络搭建
# 1. 使用Sequential容器，搭建3曾网络如下
# 2. 需要乡下一层传递时，需使用activation函数
# 3. 输出层因不用向下一层传递，因此不用激活函数activation
model = keras.Sequential([ # 三个非线性层的嵌套模型，包括两个隐藏层，一个输出层
        layers.Dense(256, activation='relu'), # 隐藏层1，用relu作为激活函数
        layers.Dense(512, activation='relu'),
        layers.Dense(10)]) # 输出层不用激活函数，输出节点数为10

# 定义优化器
optimizer = optimizers.SGD(learning_rate=0.001) 

In [None]:
# 模型训练

losses = []

for epoch in range(30):
    # 第四步： 循环迭代优化
    for step, (x, y) in enumerate(train_dataset): # python 语法，enumerate()方法将train_dataset中的后两维数据以枚举形式付给step对应的(x, y)变量
        with tf.GradientTape() as tape: # python 语法，对于后续需要释放的资源，使用with...as...语句，避免忘记释放资源
            x = tf.reshape(x, (-1, 28*28)) # 打平操作
            out = model(x) # 第一步：得到模型输出, 相当于表达式中的Y
            print(out)
            # 第二步：计算平均误差
            loss = tf.square(out - y) # 计算平方和[b, 10]
            loss = tf.reduce_sum(loss) / x.shape[0] # 计算每个样本的平均误差[b]

        # 第三步：计算并优化参数[w1, w2, w3, b1, b2, b3]
        grads = tape.gradient(loss, model.trainable_variables) # 自动计算梯度
        optimizer.apply_gradients(zip(grads, model.trainable_variables)) # w'=w-lr*grad, 更新网络参数

        if step % 100 == 0: 
            print(epoch, step, 'loss:', loss.numpy())
    losses.append(float(loss))

tf.Tensor(
[[ 0.04260537 -0.06652801  0.02229998 ...  0.12273456  0.06894664
  -0.09316976]
 [ 0.9677308  -0.11070315 -0.00718247 ...  0.16903313 -0.08342849
  -0.02381123]
 [-0.19749899  0.14264616  0.0981515  ... -0.02844247  0.03783668
   0.08140032]
 ...
 [-0.02767906  0.03313285  0.3757003  ...  0.19108401  0.16262507
   0.1677037 ]
 [-0.12911844 -0.01655202  0.24885423 ... -0.00320847  0.04406442
  -0.0748364 ]
 [-0.04354261  0.0969829  -0.06851237 ... -0.10224245  0.9603305
  -0.01872438]], shape=(500, 10), dtype=float32)
0 0 loss: 0.26945922
tf.Tensor(
[[-0.00967856  0.20563492  0.28282562 ...  0.5194886  -0.08072514
  -0.14412105]
 [-0.13165976  0.05197018  0.04425717 ...  0.07175201  0.2899569
   0.10852373]
 [ 0.01680266  0.09959754  0.11641422 ... -0.04608878  0.33592036
  -0.02652988]
 ...
 [ 0.4805474   0.06382421  0.2070075  ...  0.04463937 -0.01929643
  -0.10684848]
 [ 0.20924294 -0.03714111  0.27735704 ...  0.1748316  -0.06310537
   0.14103372]
 [-0.02401364  0.2572851

   7.96614587e-02  8.50422621e-01]], shape=(500, 10), dtype=float32)
tf.Tensor(
[[ 0.04795568 -0.08983411  0.28110927 ...  0.02305387  0.32153174
  -0.05199169]
 [ 0.03478543 -0.07247637  0.04023105 ...  0.84365803 -0.10271217
   0.07412548]
 [-0.00556857  0.05964753 -0.04974734 ...  0.38090754  0.01438827
   0.4797209 ]
 ...
 [-0.03376159  0.12622121  0.02994632 ... -0.12217082  0.22446525
  -0.08625855]
 [ 0.02891692 -0.01402402  0.11027345 ...  0.7526386  -0.17047347
   0.015109  ]
 [ 0.16106965 -0.00609533 -0.0586313  ...  0.4941695  -0.1120843
   0.20766023]], shape=(500, 10), dtype=float32)
tf.Tensor(
[[ 0.14228983 -0.03833745  0.08065244 ...  0.09253902  0.22003356
   0.01785339]
 [ 0.04091987  0.13149697  0.02674475 ...  0.07990163  0.577821
  -0.01088493]
 [ 0.00319461 -0.04043689  0.05722983 ...  0.914701   -0.07161278
   0.16916767]
 ...
 [-0.02222845  0.21547487  0.02910098 ...  0.0530444   0.09584067
   0.04310095]
 [-0.02633951 -0.01775033  0.11884845 ...  0.08780498  0.0

   0.08814685]], shape=(500, 10), dtype=float32)
tf.Tensor(
[[-0.13666657 -0.06442435  0.4792906  ...  0.07376523  0.10020699
  -0.01632276]
 [ 0.14439194  0.06771967  0.08432651 ...  0.28763863 -0.0770161
   0.31513953]
 [ 0.05035696  0.05314668  0.04972611 ...  0.10728142  0.11490907
   0.3428849 ]
 ...
 [ 0.02219768 -0.00140223  0.05901052 ...  0.01118097 -0.02904358
  -0.03203867]
 [ 0.1788991   0.15668543  0.08553061 ...  0.01499105  0.07090242
   0.04238637]
 [-0.06176747  0.20744878  0.02730316 ...  0.15093373  0.7173653
   0.14583206]], shape=(500, 10), dtype=float32)
tf.Tensor(
[[-6.31451085e-02  1.23562396e-01  2.08057649e-02 ...  9.28358361e-02
  -7.14377016e-02  4.18393403e-01]
 [ 1.42821316e-02 -7.30407164e-02  6.12485446e-02 ...  1.96679279e-01
   2.08462954e-01  1.75223693e-01]
 [-3.40112336e-02 -6.92002475e-04  7.41833001e-02 ...  9.62480664e-01
   4.70477045e-02  2.10350513e-01]
 ...
 [ 6.83857175e-03  9.14596379e-01  6.64008260e-02 ...  1.60842836e-01
  -2.64862739e-0

   0.7579687 ]], shape=(500, 10), dtype=float32)
tf.Tensor(
[[-0.10313197  0.25672936  0.03028711 ...  0.45198423  0.06321321
   0.25017402]
 [-0.02350211  0.12954992  0.09990709 ...  0.08019663 -0.15745693
   0.01150541]
 [ 0.12762415  0.14768553 -0.04933221 ...  0.10659249  0.22407767
  -0.01984206]
 ...
 [ 1.006312   -0.07368539  0.07853264 ... -0.13665     0.10406327
   0.09162767]
 [-0.06066333  0.01389914  0.0222554  ...  0.00864465  0.30969715
   0.45447412]
 [ 0.15682739  0.08804435 -0.10075368 ... -0.05793211  0.393659
  -0.04446379]], shape=(500, 10), dtype=float32)
tf.Tensor(
[[-0.04162764 -0.02949643 -0.09921678 ...  1.2241048   0.09614623
  -0.00207007]
 [ 0.01788731 -0.13248846  0.13215578 ... -0.05335397  0.03243419
   0.6778665 ]
 [-0.06610588  0.07244713 -0.04003124 ... -0.08328521  0.04117061
   0.5643999 ]
 ...
 [ 0.11351754  0.09449301  0.02886779 ...  0.08645547  0.21010868
  -0.11012273]
 [ 0.07705767 -0.03592387 -0.08506341 ...  1.0463475  -0.12344271
  -0.115877

   0.09924023]], shape=(500, 10), dtype=float32)
tf.Tensor(
[[-0.13535756  0.07381097  0.0445392  ... -0.03463851  0.14438634
  -0.15176782]
 [ 0.89915884  0.02534803  0.03135091 ... -0.02990551  0.03248318
  -0.04376835]
 [ 0.04413981  0.20656662 -0.11564708 ...  0.06091247  0.16619751
  -0.1933334 ]
 ...
 [-0.05892574  0.24116243  0.23718795 ... -0.14336199  0.56487393
  -0.11740202]
 [ 0.04331428  0.7715754  -0.01152462 ...  0.01212831 -0.04687088
   0.02402591]
 [-0.06886122 -0.02513053 -0.03213444 ...  0.23408404  0.07726365
   0.38079196]], shape=(500, 10), dtype=float32)
tf.Tensor(
[[-0.00187817 -0.08431324  0.01634702 ... -0.0861921  -0.09607013
   0.5224226 ]
 [-0.04296574  0.39619377 -0.13465777 ... -0.00967999  0.6271397
  -0.01103066]
 [ 0.03043097  0.16566738  0.06273083 ... -0.09550731  0.04553518
   0.11296183]
 ...
 [ 0.01713843 -0.04810286 -0.01799459 ...  0.06115857 -0.08943566
   0.08764343]
 [ 0.08141416  0.03863654  0.05038915 ... -0.00095905  0.02433375
   0.16491

   0.4951166 ]], shape=(500, 10), dtype=float32)
tf.Tensor(
[[ 2.46935889e-01  5.07602692e-02  3.79436314e-02 ... -2.04042718e-03
  -2.62929965e-02  2.25046091e-02]
 [-1.07069714e-02  8.82072747e-02 -8.73628333e-02 ...  4.25686181e-01
   1.24791395e-02  3.76085222e-01]
 [-5.47725987e-03  8.91163945e-05  2.29264479e-02 ...  1.68160751e-01
   2.55166460e-02  7.49239802e-01]
 ...
 [ 7.10700359e-03 -1.53071523e-01  1.27817070e+00 ... -1.82005391e-03
   2.19369326e-02  2.79841244e-01]
 [ 1.10289175e-03 -2.62998566e-02  6.52923405e-01 ...  3.93978685e-01
   7.12126121e-02 -8.78168344e-02]
 [ 3.75416011e-01 -6.33104220e-02  1.40286833e-01 ...  3.10120247e-02
   1.16153732e-01  2.26397663e-01]], shape=(500, 10), dtype=float32)
tf.Tensor(
[[ 0.02446925  0.9420769   0.0459692  ... -0.01312288  0.0676736
  -0.01646104]
 [ 0.01667617 -0.07302786  0.88790846 ... -0.06098146 -0.06269448
   0.0167788 ]
 [ 0.03456745  1.0560529   0.01989321 ...  0.03506249 -0.06066341
  -0.06121304]
 ...
 [ 0.22537923

tf.Tensor(
[[-0.06835996 -0.03784159 -0.14568287 ... -0.04375358  0.37796703
  -0.12324481]
 [-0.01794073  0.95690215  0.06861046 ... -0.02738686  0.01603093
   0.02856545]
 [ 0.11113463 -0.03660833 -0.1482751  ... -0.07280153  0.55551785
   0.16308926]
 ...
 [ 0.19620283  0.24099176 -0.12570679 ...  0.07202635  0.22561587
   0.09495285]
 [ 0.11411721  1.050933    0.05312501 ...  0.05145704 -0.02295214
   0.02440826]
 [-0.09174289 -0.06244038  0.14282441 ... -0.0443489   0.08430691
  -0.04538796]], shape=(500, 10), dtype=float32)
tf.Tensor(
[[-0.03404309  0.15932381  0.47189277 ...  0.08948228  0.12854816
   0.02663163]
 [ 0.06644955  0.09857502  0.09217949 ...  0.4577874  -0.09721071
   0.02504584]
 [ 0.9991666   0.07986422  0.1075094  ...  0.09363271  0.1713511
  -0.00504723]
 ...
 [ 0.44809026  0.03828146  0.00103724 ...  0.0097165   0.31679815
   0.1343848 ]
 [ 0.01359824 -0.01156572 -0.03469281 ... -0.10299464  0.3069659
   0.24560766]
 [ 0.3612982   0.07504597  0.04853547 ...  0.

   5.8457918e-02  8.3248533e-02]], shape=(500, 10), dtype=float32)
tf.Tensor(
[[ 0.51373845 -0.11277692  0.15051483 ...  0.24853903  0.02922346
   0.0453601 ]
 [ 0.07063654 -0.05376194 -0.08661227 ...  0.01473089  0.94379413
   0.01724329]
 [-0.03084848  0.02496799  0.4549832  ...  0.2538273  -0.12973745
   0.00416218]
 ...
 [ 0.08388225  0.08537596  0.6961512  ...  0.0959077  -0.06389438
  -0.1648919 ]
 [-0.03304642  0.7051998   0.02187295 ... -0.05845669  0.01779918
   0.02397619]
 [ 0.03648273  0.05606554  0.65558314 ... -0.01547678  0.21387435
   0.09683611]], shape=(500, 10), dtype=float32)
tf.Tensor(
[[-0.03792921  0.02477869  0.17271942 ...  0.58661103 -0.11542299
   0.1534109 ]
 [-0.04660127  0.12855513 -0.12290219 ...  0.11445462  0.20391735
   0.16803023]
 [-0.03407891 -0.0264375  -0.22396535 ... -0.02021103  0.0657421
   0.30518723]
 ...
 [ 0.02458916 -0.05315503  0.00849106 ...  0.05273586  0.00194118
  -0.03086229]
 [ 0.6921571  -0.08234737  0.03267357 ...  0.09795419  0.0

In [7]:
# 每个epoch次训练，绘制偏移量

matplotlib.rcParams['font.size'] = 20
matplotlib.rcParams['figure.titlesize'] = 20
matplotlib.rcParams['figure.figsize'] = [9, 7]
matplotlib.rcParams['font.family'] = ['STKaiTi']
matplotlib.rcParams['axes.unicode_minus']=False 


plt.figure()
plt.plot(losses, color='C0', marker='s', label='训练')
plt.xlabel('Epoch')
plt.legend()
plt.ylabel('MSE')
plt.savefig('forward.svg')