Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dataset doc updated #27933

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
80 changes: 35 additions & 45 deletions tensorflow/python/data/ops/dataset_ops.py
Expand Up @@ -566,27 +566,25 @@ def zip(datasets):
For example:

```python
# NOTE: The following examples use `{ ... }` to represent the
# contents of a dataset.
a = { 1, 2, 3 }
b = { 4, 5, 6 }
c = { (7, 8), (9, 10), (11, 12) }
d = { 13, 14 }
a = Dataset.range(1, 4) # ==> [ 1, 2, 3 ]
b = Dataset.range(4, 7) # ==> [ 4, 5, 6 ]
c = Dataset.range(7, 13).batch(2) # ==> [ [7, 8], [9, 10], [11, 12] ]
d = Dataset.range(13, 15) # ==> [ 13, 14 ]

# The nested structure of the `datasets` argument determines the
# structure of elements in the resulting dataset.
Dataset.zip((a, b)) == { (1, 4), (2, 5), (3, 6) }
Dataset.zip((b, a)) == { (4, 1), (5, 2), (6, 3) }
Dataset.zip((a, b)) # ==> [ (1, 4), (2, 5), (3, 6) ]
Dataset.zip((b, a)) # ==> [ (4, 1), (5, 2), (6, 3) ]

# The `datasets` argument may contain an arbitrary number of
# datasets.
Dataset.zip((a, b, c)) == { (1, 4, (7, 8)),
(2, 5, (9, 10)),
(3, 6, (11, 12)) }
Dataset.zip((a, b, c)) # ==> [ (1, 4, [7, 8]),
# (2, 5, [9, 10]),
# (3, 6, [11, 12]) ]

# The number of elements in the resulting dataset is the same as
# the size of the smallest dataset in `datasets`.
Dataset.zip((a, d)) == { (1, 13), (2, 14) }
Dataset.zip((a, d)) # ==> [ (1, 13), (2, 14) ]
```

Args:
Expand All @@ -601,18 +599,16 @@ def concatenate(self, dataset):
"""Creates a `Dataset` by concatenating given dataset with this dataset.

```python
# NOTE: The following examples use `{ ... }` to represent the
# contents of a dataset.
a = { 1, 2, 3 }
b = { 4, 5, 6, 7 }
a = Dataset.range(1, 4) # ==> [ 1, 2, 3 ]
b = Dataset.range(4, 8) # ==> [ 4, 5, 6, 7 ]

# Input dataset and dataset to be concatenated should have same
# nested structures and output types.
# c = { (8, 9), (10, 11), (12, 13) }
# d = { 14.0, 15.0, 16.0 }
# c = Dataset.range(8, 14).batch(2) # ==> [ [8, 9], [10, 11], [12, 13] ]
# d = Dataset.from_tensor_slices([14.0, 15.0, 16.0])
# a.concatenate(c) and a.concatenate(d) would result in error.

a.concatenate(b) == { 1, 2, 3, 4, 5, 6, 7 }
a.concatenate(b) # ==> [ 1, 2, 3, 4, 5, 6, 7 ]
```

Args:
Expand Down Expand Up @@ -936,17 +932,17 @@ def map(self, map_func, num_parallel_calls=None):
For example:

```python
# NOTE: The following examples use `{ ... }` to represent the
# contents of a dataset.
a = { 1, 2, 3, 4, 5 }
a = Dataset.range(1, 6) # ==> [ 1, 2, 3, 4, 5 ]

a.map(lambda x: x + 1) = { 2, 3, 4, 5, 6 }
a.map(lambda x: x + 1) # ==> [ 2, 3, 4, 5, 6 ]
```

The input signature of `map_func` is determined by the structure of each
element in this dataset. For example:

```python
# NOTE: The following examples use `{ ... }` to represent the
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do you introduce this comment here? I thought the purpose of this PR was to replace { ... } with code that runs

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are several examples following line in line 944 - 961 that I didn't change in this request and still contain non executable code. I wanted to keep this request small because I expected some feedback. I'll take another look in a separate request.

# contents of a dataset.
# Each element is a `tf.Tensor` object.
a = { 1, 2, 3, 4, 5 }
# `map_func` takes a single argument of type `tf.Tensor` with the same
Expand Down Expand Up @@ -1032,12 +1028,10 @@ def flat_map(self, map_func):
dataset of their elements:

```python
# NOTE: The following examples use `{ ... }` to represent the
# contents of a dataset. '[...]' represents a tensor.
a = {[1,2,3,4,5], [6,7,8,9], [10]}
a = Dataset.from_tensor_slices([ [1, 2, 3], [4, 5, 6], [7, 8, 9] ])

a.flat_map(lambda x: Dataset.from_tensor_slices(x)) ==
{[1,2,3,4,5,6,7,8,9,10]}
a.flat_map(lambda x: Dataset.from_tensor_slices(x + 1)) # ==>
# [ 2, 3, 4, 5, 6, 7, 8, 9, 10 ]
```

`tf.data.Dataset.interleave()` is a generalization of `flat_map`, since
Expand Down Expand Up @@ -1088,24 +1082,20 @@ def interleave(self,
For example:

```python
# NOTE: The following examples use `{ ... }` to represent the
# contents of a dataset.
a = { 1, 2, 3, 4, 5 }
a = Dataset.range(1, 6) # ==> [ 1, 2, 3, 4, 5 ]

# NOTE: New lines indicate "block" boundaries.
a.interleave(lambda x: Dataset.from_tensors(x).repeat(6),
cycle_length=2, block_length=4) == {
1, 1, 1, 1,
2, 2, 2, 2,
1, 1,
2, 2,
3, 3, 3, 3,
4, 4, 4, 4,
3, 3,
4, 4,
5, 5, 5, 5,
5, 5,
}
cycle_length=2, block_length=4) # ==> [1, 1, 1, 1,
# 2, 2, 2, 2,
# 1, 1,
# 2, 2,
# 3, 3, 3, 3,
# 4, 4, 4, 4,
# 3, 3,
# 4, 4,
# 5, 5, 5, 5,
# 5, 5]
```

NOTE: The order of elements yielded by this transformation is
Expand Down Expand Up @@ -1143,13 +1133,13 @@ def filter(self, predicate):
```python
d = tf.data.Dataset.from_tensor_slices([1, 2, 3])

d = d.filter(lambda x: x < 3) # [1, 2]
d = d.filter(lambda x: x < 3) # ==> [1, 2]

# `tf.math.equal(x, y)` is required for equality comparison
def filter_fn(x):
return tf.math.equal(x, 1)

d = d.filter(filter_fn) # [1]
d = d.filter(filter_fn) # ==> [1]
```

Args:
Expand Down