In [1]:
import tensorflow as tf
from tensorflow import keras

#### keras.datasets
#### tf.data.Dataset.from_tensor_slices
##### shuffle, map, batch, repeat

In [2]:
# mnist

(x, y), (x_test, y_test) = keras.datasets.mnist.load_data()

In [3]:
x.shape

(60000, 28, 28)

In [4]:
y.shape

(60000,)

In [5]:
x.min(),x.max(),x.mean()

(0, 255, 33.318421449829934)

In [6]:
x_test.shape, y_test.shape

((10000, 28, 28), (10000,))

In [7]:
y[:4]

array([5, 0, 4, 1], dtype=uint8)

In [8]:
y_onehot = tf.one_hot(y, depth=10)

In [9]:
y_onehot[:2]

<tf.Tensor: id=8, shape=(2, 10), dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)>

In [10]:
# CIFAR 10/100

(x, y), (x_test, y_test) = keras.datasets.cifar10.load_data()

In [11]:
x.shape, y.shape, x_test.shape, y_test.shape

((50000, 32, 32, 3), (50000, 1), (10000, 32, 32, 3), (10000, 1))

In [12]:
x.min(), x.max()

(0, 255)

In [13]:
y[:4]

array([[6],
       [9],
       [9],
       [4]], dtype=uint8)

In [14]:
# 转换为tensor和可迭代数据

db=tf.data.Dataset.from_tensor_slices(x_test)

In [15]:
next(iter(db)).shape

TensorShape([32, 32, 3])

In [16]:
db=tf.data.Dataset.from_tensor_slices((x_test, y_test))

In [17]:
next(iter(db))[0].shape

TensorShape([32, 32, 3])

In [18]:
# 要把数据顺序打散， x 和 y 对应之后打散

db = db.shuffle(10000)

### map

In [19]:
# 预处理

def preprocess(x, y):
    x = tf.cast(x, dtype=tf.float32)/255.
    y = tf.cast(y, dtype=tf.int32)
    y = tf.one_hot(y, depth=10)
    return x, y

In [20]:
db2 = db.map(preprocess)

In [21]:
res = next(iter(db2))

In [22]:
res[0].shape, res[1].shape
print(res)

(<tf.Tensor: id=52, shape=(32, 32, 3), dtype=float32, numpy=
array([[[0.        , 0.        , 0.        ],
        [0.00392157, 0.        , 0.        ],
        [0.        , 0.        , 0.        ],
        ...,
        [0.        , 0.01176471, 0.01176471],
        [0.        , 0.01176471, 0.01176471],
        [0.00392157, 0.        , 0.01568628]],

       [[0.00392157, 0.        , 0.        ],
        [0.        , 0.        , 0.        ],
        [0.        , 0.00392157, 0.        ],
        ...,
        [0.        , 0.00392157, 0.00392157],
        [0.        , 0.00392157, 0.00392157],
        [0.        , 0.        , 0.00784314]],

       [[0.00392157, 0.        , 0.        ],
        [0.        , 0.00392157, 0.00392157],
        [0.        , 0.00784314, 0.00392157],
        ...,
        [0.01176471, 0.        , 0.        ],
        [0.00392157, 0.        , 0.        ],
        [0.        , 0.00392157, 0.        ]],

       ...,

       [[0.        , 0.        , 0.        ],
       

In [23]:
res[1][:2]

<tf.Tensor: id=59, shape=(1, 10), dtype=float32, numpy=array([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]], dtype=float32)>

### batch

In [24]:
db3 = db2.batch(32)

In [25]:
res = next(iter(db3))

In [26]:
res[0].shape, res[1].shape

# 可以用 squeeze 把 y 变成 [32, 10]

(TensorShape([32, 32, 32, 3]), TensorShape([32, 1, 10]))

## .repeat()