## Tensorflow读取数据的Dataset API

In [1]:
import tensorflow as tf

### 1. 读取数据构造Dataset

#### 读取列表

In [10]:
datas = [[1, 2], [3, 4], [5, 6]]
dataset = tf.data.Dataset.from_tensor_slices(datas)
dataset

<TensorSliceDataset shapes: (2,), types: tf.int32>

In [11]:
for row in dataset:
    print(row)

tf.Tensor([1 2], shape=(2,), dtype=int32)
tf.Tensor([3 4], shape=(2,), dtype=int32)
tf.Tensor([5 6], shape=(2,), dtype=int32)


#### 读取字典

In [12]:
datas = {"a": [1, 2, 4], "b": [4, 5, 6], "c": [7,8,9]}
dataset = tf.data.Dataset.from_tensor_slices(datas)
dataset

<TensorSliceDataset shapes: {a: (), b: (), c: ()}, types: {a: tf.int32, b: tf.int32, c: tf.int32}>

In [20]:
for data in dataset.as_numpy_iterator():
    print(data)

{'a': 1, 'b': 4, 'c': 7}
{'a': 2, 'b': 5, 'c': 8}
{'a': 4, 'b': 6, 'c': 9}


#### 读取元组

In [22]:
features = [
    [1,2,3],
    [4,5,6],
    [7,8,9]
]
labels = [1, 0, 1]

dataset = tf.data.Dataset.from_tensor_slices((features, labels))
dataset

<TensorSliceDataset shapes: ((3,), ()), types: (tf.int32, tf.int32)>

In [26]:
for data in dataset.as_numpy_iterator():
    print(data)

(array([1, 2, 3]), 1)
(array([4, 5, 6]), 0)
(array([7, 8, 9]), 1)


### 2. 对Dataset执行各种转换

In [41]:
datas = list(range(20))
dataset = tf.data.Dataset.from_tensor_slices(datas)
list(dataset.as_numpy_iterator())

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]

#### map操作

In [42]:
dataset_map = dataset.map(lambda x : x+1)
list(dataset_map.as_numpy_iterator())

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]

#### batch操作

In [43]:
dataset_batch = dataset.batch(3)
list(dataset_batch.as_numpy_iterator())

[array([0, 1, 2]),
 array([3, 4, 5]),
 array([6, 7, 8]),
 array([ 9, 10, 11]),
 array([12, 13, 14]),
 array([15, 16, 17]),
 array([18, 19])]

#### repeat

In [None]:
dataset_repeat = dataset.repeat(3)
list(dataset_batch.as_numpy_iterator())