In [2]:
import tensorflow as tf
import numpy as np

In [None]:
dataset = tf.data.Dataset.from_tensor_slices([[1], [2], [3]])
for element in dataset:
  print(element)

In [None]:
dataset.element_spec

In [None]:
dataset = tf.data.Dataset.range(8)
dataset = dataset.batch(3, drop_remainder=True)

In [None]:
list(dataset.as_numpy_iterator())

In [None]:
elements = [
  [0], [1, 2, 3, 4], [5, 6, 7],
  [7, 8, 9, 10, 11], [13, 14, 15, 16, 19, 20], [21, 22]]

In [None]:
dataset = tf.data.Dataset.from_generator(lambda: elements, tf.int64, output_shapes=[None])

In [None]:
list(dataset.as_numpy_iterator())

In [None]:
dataset = dataset.bucket_by_sequence_length(
        element_length_func=lambda elem: tf.shape(elem)[0],
        bucket_boundaries=[3, 5],
        bucket_batch_sizes=[2, 2, 2])

In [None]:
dataset = tf.data.Dataset.range(5)
dataset = dataset.map(lambda x: x**2)
dataset = dataset.cache()

In [None]:
list(dataset.as_numpy_iterator())

In [None]:
datasets = [tf.data.Dataset.from_tensors("foo").repeat(),
            tf.data.Dataset.from_tensors("bar").repeat(),
            tf.data.Dataset.from_tensors("baz").repeat()]

In [None]:
list(dataset.as_numpy_iterator())

In [None]:
choice_dataset = tf.data.Dataset.range(3).repeat(3)

In [None]:
list(choice_dataset.as_numpy_iterator())

In [None]:
result = tf.data.Dataset.choose_from_datasets(datasets, choice_dataset)

In [None]:
a = tf.data.Dataset.range(1, 4)  # ==> [ 1, 2, 3 ]
b = tf.data.Dataset.range(4, 8)  # ==> [ 4, 5, 6, 7 ]
ds = a.concatenate(b)
list(ds.as_numpy_iterator())

In [None]:
dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3])
list(dataset.as_numpy_iterator())

In [None]:
dataset = dataset.enumerate(start=5)
list(dataset.as_numpy_iterator())

In [None]:
dataset = tf.data.Dataset.from_tensor_slices([(7, 8), (9, 10)])
list(dataset.as_numpy_iterator())

In [None]:
dataset = dataset.enumerate(start=3)
list(dataset.as_numpy_iterator())

In [None]:
dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3])
list(dataset.as_numpy_iterator())

In [None]:
dataset = dataset.filter(lambda x: x < 3)
list(dataset.as_numpy_iterator())

In [None]:
def filter_fn(x):
  return tf.math.equal(x, 1)
  
dataset = dataset.filter(filter_fn)
list(dataset.as_numpy_iterator())

In [None]:
dataset = tf.data.Dataset.from_tensor_slices(
    [[1, 2, 3], [4, 5, 6], [7, 8, 9]])
list(dataset.as_numpy_iterator())

In [None]:
dataset = dataset.flat_map(
    lambda x: tf.data.Dataset.from_tensor_slices(x))
list(dataset.as_numpy_iterator())

In [None]:
def gen():
  ragged_tensor = tf.ragged.constant([[1, 2], [3]])
  yield 42, ragged_tensor

In [None]:
dataset = tf.data.Dataset.from_generator(
     gen,
     output_signature=(
         tf.TensorSpec(shape=(), dtype=tf.int32),
         tf.RaggedTensorSpec(shape=(2, None), dtype=tf.int32)))

In [None]:
# Slicing a 1D tensor produces scalar tensor elements.
dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3])
list(dataset.as_numpy_iterator())
dataset = tf.data.Dataset.from_tensor_slices([[1, 2], [3, 4]])
list(dataset.as_numpy_iterator())


In [None]:
dataset = tf.data.Dataset.from_tensor_slices(([1, 2], [3, 4], [5, 6]))
list(dataset.as_numpy_iterator())

In [None]:
dataset = tf.data.Dataset.from_tensor_slices({"a": [1, 2], "b": [3, 4]})
list(dataset.as_numpy_iterator())

In [None]:
features = tf.constant([[1, 3], [2, 1], [3, 3]]) # ==> 3x2 tensor
labels = tf.constant(['A', 'B', 'A']) # ==> 3x1 tensor
dataset = tf.data.Dataset.from_tensor_slices((features, labels))

In [None]:
features_dataset = tf.data.Dataset.from_tensor_slices(features)
labels_dataset = tf.data.Dataset.from_tensor_slices(labels)
dataset = tf.data.Dataset.zip((features_dataset, labels_dataset))

list(dataset.as_numpy_iterator())

In [None]:
batched_features = tf.constant([[[1, 3], [2, 3]],
                                [[2, 1], [1, 2]],
                                [[3, 3], [3, 2]]], shape=(3, 2, 2))

In [None]:
batched_labels = tf.constant([['A', 'A'],
                              ['B', 'B'],
                              ['A', 'B']], shape=(3, 2, 1))

In [None]:
dataset = tf.data.Dataset.from_tensor_slices((batched_features, batched_labels))
for element in dataset.as_numpy_iterator():
  print(element)

In [None]:
dataset = tf.data.Dataset.from_tensors([1, 2, 3])
list(dataset.as_numpy_iterator())

In [None]:
dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3])
list(dataset.as_numpy_iterator())

In [None]:
dataset = tf.data.Dataset.from_tensors(([1, 2, 3], 'A'))
list(dataset.as_numpy_iterator())

In [None]:
example = tf.constant([1,2,3])
dataset = tf.data.Dataset.from_tensors(example).repeat(2)
list(dataset.as_numpy_iterator())



In [None]:
def preprocessing_fn(feature):
  # ... the raw_feature is preprocessed as per the use-case
  return feature

raw_features = [1, 2, 3, 4]  # input batch of BATCH_SIZE elements.
dataset = (tf.data.Dataset.from_tensor_slices(raw_features)
          .map(preprocessing_fn, num_parallel_calls=4)
          .batch(2))


In [None]:
list(dataset.as_numpy_iterator())

In [165]:
dataset = tf.data.Dataset.range(10)
window_size = 5
key_func = lambda x: x%2
reduce_func = lambda key, dataset: dataset.batch(window_size)
dataset = dataset.group_by_window(
          key_func=key_func,
          reduce_func=reduce_func,
          window_size=window_size)



In [171]:
dataset = tf.data.Dataset.range(10)
list(dataset.as_numpy_iterator())
dataset = dataset.group_by_window(key_func=lambda x: x, reduce_func=lambda _, ds: ds.batch(2), window_size=2)

In [174]:
A = (tf.data.Dataset
     .range(1, 5, output_type=tf.int32)
     .map(lambda x: tf.fill([x], x)))
list(A.as_numpy_iterator())

[array([1], dtype=int32),
 array([2, 2], dtype=int32),
 array([3, 3, 3], dtype=int32),
 array([4, 4, 4, 4], dtype=int32)]

In [184]:
E = tf.data.Dataset.zip((A, A)).padded_batch(2, padding_values=-1)
list(E.as_numpy_iterator())

[(array([[ 1, -1],
         [ 2,  2]], dtype=int32),
  array([[ 1, -1],
         [ 2,  2]], dtype=int32)),
 (array([[ 3,  3,  3, -1],
         [ 4,  4,  4,  4]], dtype=int32),
  array([[ 3,  3,  3, -1],
         [ 4,  4,  4,  4]], dtype=int32))]

In [177]:
B = A.padded_batch(2)
list(B.as_numpy_iterator())

[array([[1, 0],
        [2, 2]], dtype=int32),
 array([[3, 3, 3, 0],
        [4, 4, 4, 4]], dtype=int32)]

In [179]:
C = A.padded_batch(2, padded_shapes=5, padding_values=-1)
list(C.as_numpy_iterator())

[array([[ 1, -1, -1, -1, -1],
        [ 2,  2, -1, -1, -1]], dtype=int32),
 array([[ 3,  3,  3, -1, -1],
        [ 4,  4,  4,  4, -1]], dtype=int32)]

In [182]:
elements = [([1, 2, 3], [10]),
            ([4, 5], [11, 12])]
dataset = tf.data.Dataset.from_generator(
    lambda: iter(elements), (tf.int32, tf.int32))

list(dataset.as_numpy_iterator())


[(array([1, 2, 3], dtype=int32), array([10], dtype=int32)),
 (array([4, 5], dtype=int32), array([11, 12], dtype=int32))]

In [183]:
dataset = dataset.padded_batch(2,
    padded_shapes=([4], [None]),
    padding_values=(-1, 100))
list(dataset.as_numpy_iterator())

[(array([[ 1,  2,  3, -1],
         [ 4,  5, -1, -1]], dtype=int32),
  array([[ 10, 100],
         [ 11,  12]], dtype=int32))]

In [187]:
dataset = tf.data.Dataset.range(3)
dataset = dataset.prefetch(2)
list(dataset.as_numpy_iterator())


[0, 1, 2]

In [190]:
ds1 = tf.data.Dataset.random(seed=4).take(10)
ds2 = tf.data.Dataset.random(seed=4).take(10)
print(list(ds2.as_numpy_iterator())==list(ds2.as_numpy_iterator()))



True


In [6]:
dataset = tf.data.Dataset.range(3)
print(list(dataset.as_numpy_iterator()))
dataset = dataset.shuffle(3, reshuffle_each_iteration=True)
print(list(dataset.as_numpy_iterator()))
dataset = dataset.repeat(2)
print(list(dataset.as_numpy_iterator()))


[0, 1, 2]
[2, 1, 0]
[2, 1, 0, 1, 0, 2]


In [8]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.skip(7)
list(dataset.as_numpy_iterator())


[7, 8, 9]

In [10]:
elements = [ [1, 2, 3], [1, 2], [1, 2, 3, 4] ]
dataset = tf.data.Dataset.from_generator(lambda: elements, tf.int64)
list(dataset.as_numpy_iterator())


[array([1, 2, 3]), array([1, 2]), array([1, 2, 3, 4])]

In [11]:
dataset = dataset.unbatch()
list(dataset.as_numpy_iterator())


[1, 2, 3, 1, 2, 1, 2, 3, 4]

In [12]:
dataset = tf.data.Dataset.from_tensor_slices([1, 37, 2, 37, 2, 1])
dataset = dataset.unique()
sorted(list(dataset.as_numpy_iterator()))

[1, 2, 37]

In [15]:
dataset = tf.data.Dataset.range(7).window(3)
for window in dataset:
  print(list(window.as_numpy_iterator()))

[0, 1, 2]
[3, 4, 5]
[6]


2022-05-24 15:53:23.714519: W tensorflow/core/framework/dataset.cc:768] Input of Window will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.
