In [1]:
import tensorflow as tf
import numpy as np

In [2]:
a = tf.random_uniform(shape=[1, 10], dtype=tf.float32)
dataset1 = tf.data.Dataset.from_tensor_slices(a)
print(dataset1.output_types)
print(dataset1.output_shapes)

dataset2 = tf.data.Dataset.from_tensor_slices(
            (tf.random_uniform([4]),
            tf.random_uniform([4, 100], maxval=100, dtype=tf.int32)
            ))

print(dataset2.output_types)
print(dataset2.output_shapes)

dataset3 = tf.data.Dataset.zip((dataset1, dataset2))
print(dataset3.output_types)
print(dataset3.output_shapes)

<dtype: 'float32'>
(10,)
(tf.float32, tf.int32)
(TensorShape([]), TensorShape([Dimension(100)]))
(tf.float32, (tf.float32, tf.int32))
(TensorShape([Dimension(10)]), (TensorShape([]), TensorShape([Dimension(100)])))


In [3]:
dataset = tf.data.Dataset.from_tensor_slices(
          {"a": tf.random_uniform([4]),
           "b": tf.random_uniform([4, 100], maxval=100)
          }
            )

print(dataset.output_types)
print(dataset.output_shapes)

{'a': tf.float32, 'b': tf.float32}
{'a': TensorShape([]), 'b': TensorShape([Dimension(100)])}


In [4]:
# Making a one shot iterator
dataset = tf.data.Dataset.range(100)
iterator = dataset.make_one_shot_iterator()
next_element = iterator.get_next()

sess = tf.Session()
for i in range(100):
    value = sess.run(next_element)
    assert i == value

In [5]:
# Making an initializable iterator
max_value = tf.placeholder(tf.int64, shape=[])
dataset = tf.data.Dataset.range(max_value)
iterator = dataset.make_initializable_iterator()
next_element = iterator.get_next()

sess.run(iterator.initializer, feed_dict={max_value: 10})
for i in range(10):
    value = sess.run(next_element)
    assert i == value
    
sess.run(iterator.initializer, feed_dict={max_value: 100})
for i in range(100):
    value = sess.run(next_element)
    assert i == value   

In [6]:
# Making a reinitializable iterator
training_dataset = tf.data.Dataset.range(100).map(
                    lambda x: x + tf.random_uniform([], -10, 10, tf.int64)
                    )
validate_dataset = tf.data.Dataset.range(50)
iterator = tf.data.Iterator.from_structure(training_dataset.output_types,
                                           training_dataset.output_shapes)

next_element = iterator.get_next()

training_init_op = iterator.make_initializer(training_dataset)
validation_init_op = iterator.make_initializer(validate_dataset)

# Run 20 epochs over training and validation datasets
for _ in range(20):
    sess.run(training_init_op)
    
    # Initialize iterator over training dataset
    for _ in range(100):
        sess.run(next_element)
      
    # Initialize iterator over validation dataset
    sess.run(validation_init_op)
    for _ in range(50):
        sess.run(next_element)

In [None]:
dataset = tf.data.Dataset.range(5)
iterator = dataset.make_initializable_iterator()
next_element = iterator.get_next()

result = tf.add(next_element, next_element)

sess.run(iterator.initializer)

while True:
    try:
        print(sess.run(result))
    except tf.errors.OutOfRangeError:
        print("End of dataset")
        break
        

In [None]:
# Importing from Numpy
features = np.random.normal(size=(100, 10))
labels = np.random.binomial(1, .5, 100)

dataset = tf.data.Dataset.from_tensor_slices((features, labels))

iterator = dataset.make_one_shot_iterator()
next_element = iterator.get_next()
for i in range(5):
    value = sess.run(next_element)
    print(value)

Method above stores features and labels as tf.constant operations, which is memory intensive.  Better to use tf.placeholder

In [7]:
features_ph = tf.placeholder(features.dtype, features.shape)
labels_ph = tf.placeholder(labels.dtype, labels.shape)

dataset = tf.data.Dataset.from_tensor_slices((features_ph, labels_ph))

dataset = dataset.batch(2)
iterator = dataset.make_initializable_iterator()

next_element = iterator.get_next()
sess.run(iterator.initializer, feed_dict={features_ph: features,
                                          labels_ph: labels})

for i in range(5):
    value = sess.run(next_element)
    print(value)

NameError: name 'features' is not defined

In [None]:
# Padded batch can be used to create constant length batches
# padded_shapes=[None], sets all lengths to max value in batch
dataset = tf.data.Dataset.range(100)
dataset = dataset.map(lambda x: tf.fill([tf.cast(x, tf.int32)], x))
dataset = dataset.padded_batch(4, padded_shapes=[None])

iterator = dataset.make_one_shot_iterator()
next_element = iterator.get_next()

print(sess.run(next_element))  # ==> [[0, 0, 0], [1, 0, 0], [2, 2, 0], [3, 3, 3]]
print(sess.run(next_element))  # ==> [[4, 4, 4, 4, 0, 0, 0],
                               #      [5, 5, 5, 5, 5, 0, 0],
                               #      [6, 6, 6, 6, 6, 6, 0],
                               #      [7, 7, 7, 7, 7, 7, 7]]