In [2]:
import tensorflow as tf
from tensorflow import keras

In [3]:
# mnist
(x, y), (x_test, y_test) = keras.datasets.mnist.load_data() # 返回numpy
x.shape
y.shape
x.min()
y_onehot = tf.one_hot(y, depth=10)

In [4]:
# fashion mnist
(x, y), (x_test, y_test) = keras.datasets.fashion_mnist.load_data() # 返回numpy
print(f"x shape:{x.shape}, y shape:{y.shape}")

x shape:(60000, 28, 28), y shape:(60000,)


In [5]:
from keras import layers, optimizers, Sequential, metrics

def preprocess(x, y):

    x = tf.cast(x, dtype=tf.float32) / 255.
    y = tf.cast(y, dtype=tf.int32)
    return x,y

batchsz = 128
db = tf.data.Dataset.from_tensor_slices((x,y))
db = db.map(preprocess).shuffle(10000).batch(batchsz)
db_iter = iter(db)
sample = next(db_iter)
print('batch:', sample[0].shape, sample[1].shape)

db_test = tf.data.Dataset.from_tensor_slices((x_test,y_test))
db_test = db_test.map(preprocess).batch(batchsz)


batch: (128, 28, 28) (128,)


In [6]:
model = Sequential([
    layers.Dense(256, activation=tf.nn.relu), # [b, 784] => [b, 256]
    layers.Dense(128, activation=tf.nn.relu), # [b, 256] => [b, 128]
    layers.Dense(64, activation=tf.nn.relu), # [b, 128] => [b, 64]
    layers.Dense(32, activation=tf.nn.relu), # [b, 64] => [b, 32]
    layers.Dense(10) # [b, 32] => [b, 10], 330 = 32*10 + 10
])
model.build(input_shape=[None, 28*28])
model.summary()
# w = w - lr*grad
# Adam是最好的优化器，如果你想花更少的时间更有效地训练神经网络那就用Adam。
# 对于稀疏的数据，使用动态的学习率。
# 如果你想用梯度下降算法，mini-batch梯度下降是最好的选择。
optimizer = optimizers.Adam(learning_rate=1e-3)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256)               200960    
                                                                 
 dense_1 (Dense)             (None, 128)               32896     
                                                                 
 dense_2 (Dense)             (None, 64)                8256      
                                                                 
 dense_3 (Dense)             (None, 32)                2080      
                                                                 
 dense_4 (Dense)             (None, 10)                330       
                                                                 
Total params: 244,522
Trainable params: 244,522
Non-trainable params: 0
_________________________________________________________________


In [7]:
for epoch in range(30):


    for step, (x,y) in enumerate(db):

        # x: [b, 28, 28] => [b, 784]
        # y: [b]
        x = tf.reshape(x, [-1, 28*28])

        with tf.GradientTape() as tape:
            # [b, 784] => [b, 10]
            logits = model(x)
            y_onehot = tf.one_hot(y, depth=10)
            # [b]
            loss_mse = tf.reduce_mean(tf.losses.MSE(y_onehot, logits))
            loss_ce = tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)
            loss_ce = tf.reduce_mean(loss_ce)

        grads = tape.gradient(loss_ce, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))


        if step % 100 == 0:
            print(epoch, step, 'loss:', float(loss_ce), float(loss_mse))


    # test
    total_correct = 0
    total_num = 0
    for x,y in db_test:

        # x: [b, 28, 28] => [b, 784]
        # y: [b]
        x = tf.reshape(x, [-1, 28*28])
        # [b, 10]
        logits = model(x)
        # logits => prob, [b, 10]
        prob = tf.nn.softmax(logits, axis=1)
        # [b, 10] => [b], int64
        pred = tf.argmax(prob, axis=1)
        pred = tf.cast(pred, dtype=tf.int32)
        # pred:[b]
        # y: [b]
        # correct: [b], True: equal, False: not equal
        correct = tf.equal(pred, y)
        correct = tf.reduce_sum(tf.cast(correct, dtype=tf.int32))

        total_correct += int(correct)
        total_num += x.shape[0]

    acc = total_correct / total_num
    print(epoch, 'test acc:', acc)

0 0 loss: 2.365816116333008 0.17254051566123962
0 100 loss: 0.6091817617416382 17.28742027282715
0 200 loss: 0.4815180003643036 19.865001678466797
0 300 loss: 0.4163750410079956 20.360986709594727
0 400 loss: 0.3302662670612335 23.38900375366211
0 test acc: 0.8344
1 0 loss: 0.4726368188858032 23.090065002441406
1 100 loss: 0.45495569705963135 27.5794677734375
1 200 loss: 0.33877336978912354 26.329254150390625
1 300 loss: 0.4583154618740082 22.437856674194336
1 400 loss: 0.45800724625587463 24.718666076660156
1 test acc: 0.8672
2 0 loss: 0.365939736366272 25.27383041381836
2 100 loss: 0.44438666105270386 23.356464385986328
2 200 loss: 0.4254318177700043 31.04400634765625
2 300 loss: 0.2957124412059784 35.90864944458008
2 400 loss: 0.2693067789077759 30.904319763183594
2 test acc: 0.869
3 0 loss: 0.17273060977458954 40.43163299560547
3 100 loss: 0.4020720422267914 36.184425354003906
3 200 loss: 0.21347549557685852 36.93004608154297
3 300 loss: 0.3364000916481018 32.04872131347656
3 400 l

### cifar10/100
cifar10 和 cifar100是相同的数据集  只是标签不同，10是大类， 100是小类

![cifar10](images/cifar10.png)

In [8]:
# cifar10/100
(x, y), (x_test, y_test) = keras.datasets.cifar10.load_data()
x.shape
y.shape
x_test.shape
y_test.shape



(10000, 1)

In [9]:
# tf.data.Dataset
db = tf.data.Dataset.from_tensor_slices((x_test, y_test))
next(iter(db))[0].shape

# .shuffle
db = db.shuffle(10000)

# .map
def preprocess(x, y):
    x = tf.cast(x, dtype=tf.float32) / 255.
    y = tf.cast(y, dtype=tf.int32)
    y = tf.one_hot(y, depth=10)
    return x, y

db2 = db.map(preprocess)
res = next(iter(db2))
res[0].shape, res[1].shape

# .batch
db3 = db2.batch(32)
res=next(iter(db3))
res[0].shape, res[1].shape

# StopIteration
# 通过try catch不停的迭代
# db_iter = iter(db3)
# while True:
#     next(db_iter)


# .repeat
db4 = db3.repeat()# 数据无限循环
db4 = db3.repeat(2)#  数据重复n次

In [10]:
# Example
def prepare_minst_features_and_labels(X, y):
    X = tf.cast(X, tf.float32) / 255.
    y = tf.cast(y, tf.int64)
    return X, y

def mnist_dataset():
    (x, y), (x_val, y_val) = keras.datasets.fashion_mnist.load_data()
    y = tf.one_hot(y, depth=10)
    y_val = tf.one_hot(y_val, depth=10)

    ds = tf.data.Dataset.from_tensor_slices((x, y))
    ds = ds.map(prepare_minst_features_and_labels)
    ds = ds.shuffle(60000).batch(100)
    ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
    ds_val = ds_val.map(prepare_minst_features_and_labels)
    ds_val = ds_val.shuffle(10000).batch(100)
    return ds, ds_val