In [1]:
import tensorflow as tf
import os
import numpy as np

print(tf.__version__)

2.0.0


In [2]:
# 导入数据
from tensorflow.keras.datasets import fashion_mnist

(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

def data_scale(x, y):
    x = tf.cast(x, tf.float32)
    x = tf.squeeze(tf.reshape(x, shape=(-1, 1)))
    x = x / 255.0
    y = tf.cast(y, tf.float32)
    return x, y
    
    
train_db = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(64).map(data_scale).batch(64)
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test)).shuffle(64).map(data_scale).batch(64)

In [3]:
# drop_out函数编写
from tensorflow import keras, nn,losses
from tensorflow.keras.layers import Dropout, Flatten, Dense

def dropout(X, drop_prob=0.1):
    assert 0 <= drop_prob <= 1
    keep_prob = 1 - drop_prob
    # 这种情况下把全部元素都丢弃
    if keep_prob == 0:
        return tf.zeros_like(X)
    
    mask = tf.random.uniform(shape=X.shape, minval=0, maxval=1) < keep_prob   # 此处解释一下：使用0-1区间均匀分布的x.shape的矩阵元素小于 keepprob生成的的True or False矩阵
    # 注意这里是数乘
#     print(mask)
    # 使用 0,1 矩阵数乘计算，然后使用 1/ keep_prob 尺度进行拉伸
    #初始mask为一个bool型数组，故需要强制类型转换，这个和其他框架不一样
    after_dropout = tf.cast(mask, dtype=tf.float32) * tf.cast(X, dtype=tf.float32) / keep_prob
    return after_dropout

In [4]:
X = tf.reshape(tf.range(0, 16), shape=(2, 8))
print(X)
dropout(X, 0)

tf.Tensor(
[[ 0  1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14 15]], shape=(2, 8), dtype=int32)


<tf.Tensor: id=72, shape=(2, 8), dtype=float32, numpy=
array([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11., 12., 13., 14., 15.]], dtype=float32)>

In [5]:
dropout(X, 0.5)

<tf.Tensor: id=86, shape=(2, 8), dtype=float32, numpy=
array([[ 0.,  2.,  0.,  6.,  8., 10., 12., 14.],
       [16., 18., 20., 22., 24., 26., 28., 30.]], dtype=float32)>

In [6]:
dropout(X, 1.0)

<tf.Tensor: id=87, shape=(2, 8), dtype=int32, numpy=
array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]])>

In [7]:
# 定义模型参数
W1 = tf.Variable(tf.random.normal(stddev=0.1,shape=(784, 128)))
B1 = tf.Variable(tf.random.normal(stddev=0.1, shape=(1,128)))
W2 = tf.Variable(tf.random.normal(stddev=0.1,shape=(128, 64)))
B2= tf.Variable(tf.random.normal(stddev=0.1, shape=(1,64)))
W3 = tf.Variable(tf.random.normal(stddev=0.1,shape=(64, 10)))
B3 = tf.Variable(tf.random.normal(stddev=0.1, shape=(1,10)))

params = [W1, B1, W2, B2, W3, B3]

In [8]:
# 定义模型
def net(X, training=True, drop_rate=0.2):
    X = tf.nn.relu(X @ W1 + B1)
    if training:
        X = dropout(X, drop_rate)
    X = tf.nn.relu(X @ W2 + B2)
    if training:
        X = dropout(X, drop_rate)
    X = X @ W3 + B3
    return tf.math.softmax(X)

In [9]:
# 定义损失函数
def cross_entropy(y, y_hat):
    y = tf.cast(y, tf.int32)
    y = tf.one_hot(y,axis=-1,depth=10)
    y = tf.cast(y,dtype=tf.float32)
#     print(y[0], y_hat[0])
    y_hat = tf.cast(tf.reshape(y_hat, shape=(y.shape)), dtype=tf.float32)
    l = (-1) * y * tf.math.log(y_hat)# 这个会出现 log(0)导致 loss出现nan，所以在后面加一个小常数
    l = (-1) * y * tf.math.log(y_hat + 1e-10)
#     print(l)
    loss = tf.reduce_sum(l) / y.shape[0]
#     print(loss)
    return loss

In [10]:
# 定义评估函数
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0, 0.0
    for x, y in data_iter:
        yhat = net(x)
        acc_sum += tf.reduce_sum(tf.cast(tf.argmax(yhat, axis=1) == tf.cast(y, dtype=tf.int64), dtype=tf.float32))
#         n += y.shape[0]
#     return acc_sum / n  #这个更合理
    return acc_sum

In [11]:
# 训练模型
origin_acc = evaluate_accuracy(test_db, net)
origin_acc

<tf.Tensor: id=8172, shape=(), dtype=float32, numpy=763.0>

In [12]:
# 训练模型
epoches = 20
lr = 0.001
trainer = tf.keras.optimizers.Adam(lr=lr)

def train_model(train_data=None):
    for epoch in range(epoches):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0.0
        for x, y in train_data:
            with tf.GradientTape() as tp:
                y_hat = net(x)
                loss = cross_entropy(y, y_hat)
            grads = tp.gradient(loss, params)
            # 更新梯度
            trainer.apply_gradients(zip(grads, params))
            train_acc_sum += tf.reduce_sum(tf.cast(tf.argmax(y_hat, axis=1) == tf.cast(y, dtype=tf.int64), dtype=tf.float32))
            train_l_sum += loss
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_db, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))
        
train_model(train_db)

epoch 1, loss 0.0096, train acc 0.779, test acc 8289.000
epoch 2, loss 0.0067, train acc 0.846, test acc 8355.000
epoch 3, loss 0.0061, train acc 0.860, test acc 8472.000
epoch 4, loss 0.0057, train acc 0.866, test acc 8555.000
epoch 5, loss 0.0055, train acc 0.873, test acc 8521.000
epoch 6, loss 0.0053, train acc 0.878, test acc 8593.000
epoch 7, loss 0.0051, train acc 0.882, test acc 8647.000
epoch 8, loss 0.0049, train acc 0.884, test acc 8659.000
epoch 9, loss 0.0048, train acc 0.887, test acc 8659.000
epoch 10, loss 0.0047, train acc 0.889, test acc 8670.000
epoch 11, loss 0.0046, train acc 0.892, test acc 8698.000
epoch 12, loss 0.0045, train acc 0.892, test acc 8689.000
epoch 13, loss 0.0044, train acc 0.893, test acc 8725.000
epoch 14, loss 0.0044, train acc 0.896, test acc 8717.000
epoch 15, loss 0.0042, train acc 0.899, test acc 8687.000
epoch 16, loss 0.0042, train acc 0.900, test acc 8747.000
epoch 17, loss 0.0042, train acc 0.900, test acc 8738.000
epoch 18, loss 0.0041, 

In [13]:
# 简洁实现
from tensorflow import keras

model = keras.Sequential([
#     keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(128,activation='relu'),
    Dropout(0.2),
    keras.layers.Dense(64,activation='relu'),
    Dropout(0.5),
    keras.layers.Dense(10,activation=tf.nn.softmax)
])

In [14]:
# 训练模型
model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.001),
              loss = 'sparse_categorical_crossentropy',   # 没有one-hot所以选sparse
              metrics=['accuracy'])
model.fit_generator(train_db,epochs=5,validation_data=test_db)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1c77354e2b0>