In [2]:
import tensorflow as tf

In [3]:
tf.__version__

'1.4.0'

In [4]:
sess = tf.Session()
sess.run(tf.random_uniform([4, 10]))

array([[ 0.49206603,  0.00131404,  0.46489227,  0.54527295,  0.25133967,
         0.81015515,  0.07690489,  0.61856854,  0.57640159,  0.39638317],
       [ 0.67331553,  0.1272161 ,  0.38861239,  0.05945301,  0.2461921 ,
         0.67033434,  0.7146281 ,  0.14343286,  0.67792511,  0.91211033],
       [ 0.32639098,  0.9944272 ,  0.19800889,  0.38537633,  0.3426851 ,
         0.37097406,  0.05462193,  0.70528734,  0.93853819,  0.38358557],
       [ 0.19074202,  0.2794143 ,  0.90477228,  0.1620934 ,  0.92416155,
         0.08411896,  0.91636467,  0.23808205,  0.27141047,  0.23077166]], dtype=float32)

In [5]:
dataset1 = tf.data.Dataset.from_tensor_slices(tf.random_uniform([4, 10]))

In [6]:
print(dataset1.output_shapes)  # shape of each component(tensor) in an element of the dataset - tensor is one element i.e  list of 10 floats
print(dataset1.output_types) #  this tells the type of element which is float32

(10,)
<dtype: 'float32'>


In [7]:
dataset2 = tf.data.Dataset.from_tensor_slices(
   (tf.random_uniform([4]),
    tf.random_uniform([4, 100], maxval=100, dtype=tf.int32)))
print(dataset2.output_types)  # ==> "(tf.float32, tf.int32)"
print(dataset2.output_shapes)  # ==> "((), (100,))"

(tf.float32, tf.int32)
(TensorShape([]), TensorShape([Dimension(100)]))


## 3rd example and analogy to Pandas DataFrame to understand easily

below example is just like joining a dataframe and a series to produce another dataframe

In [8]:
dataset3 = tf.data.Dataset.zip((dataset1, dataset2))
print(dataset3.output_types)  # ==> (tf.float32, (tf.float32, tf.int32))
print(dataset3.output_shapes)  # ==> "(10, ((), (100,)))"

(tf.float32, (tf.float32, tf.int32))
(TensorShape([Dimension(10)]), (TensorShape([]), TensorShape([Dimension(100)])))


# Iterator

### one shot
iterating once through a dataset

In [23]:
dataset = tf.data.Dataset.range(100)
iterator = dataset.make_one_shot_iterator()
next_element = iterator.get_next()

for i in range(100):
    value = sess.run(next_element)
    #print(value)
    assert i == value

### initializable
requires you to run an explicit iterator.initializer operation before using and enables parameterization

In [14]:
max_value = tf.placeholder(tf.int64, shape=[])
dataset = tf.data.Dataset.range(max_value)
iterator = dataset.make_initializable_iterator()
next_element = iterator.get_next()

# Initialize an iterator over a dataset with 10 elements.
sess.run(iterator.initializer, feed_dict={max_value: 10})
for i in range(10):
    value = sess.run(next_element)
    assert i == value

# Initialize the same iterator over a dataset with 100 elements.
sess.run(iterator.initializer, feed_dict={max_value: 100})
for i in range(100):
    value = sess.run(next_element)
    #print(value)
    assert i == value

### reinitializable
can be initialized from multiple different Dataset objects with same shapes and types

In [44]:
# Define training and validation datasets with the same structure.
training_dataset = tf.data.Dataset.range(100).map(
    lambda x: x + tf.random_uniform([], -10, 10, tf.int64))
validation_dataset = tf.data.Dataset.range(50)

print(training_dataset)
print(validation_dataset)

<MapDataset shapes: (), types: tf.int64>
<RangeDataset shapes: (), types: tf.int64>


In [None]:
# A reinitializable iterator is defined by its structure. We could use the
# `output_types` and `output_shapes` properties of either `training_dataset`
# or `validation_dataset` here, because they are compatible.
iterator = Iterator.from_structure(training_dataset.output_types,
                                   training_dataset.output_shapes)
next_element = iterator.get_next()

training_init_op = iterator.make_initializer(training_dataset)
validation_init_op = iterator.make_initializer(validation_dataset)

# Run 20 epochs in which the training dataset is traversed, followed by the
# validation dataset.
for _ in range(20):
    # Initialize an iterator over the training dataset.
    sess.run(training_init_op)
    for _ in range(100):
        sess.run(next_element)

    # Initialize an iterator over the validation dataset.
    sess.run(validation_init_op)
    for _ in range(50):
        sess.run(next_element)


### feedable
used together with tf.placeholder to select what Iterator to use via the familiar feed_dict mechanism

In [30]:
# Define training and validation datasets with the same structure.
training_dataset = tf.data.Dataset.range(5).map(
    lambda x: x + tf.random_uniform([], -10, 10, tf.int64)).repeat(20)
validation_dataset = tf.data.Dataset.range(50)

# A feedable iterator is defined by a handle placeholder and its structure. We
# could use the `output_types` and `output_shapes` properties of either
# `training_dataset` or `validation_dataset` here, because they have
# identical structure.
handle = tf.placeholder(tf.string, shape=[])
iterator = tf.data.Iterator.from_string_handle(
    handle, training_dataset.output_types, training_dataset.output_shapes)
next_element = iterator.get_next()

# You can use feedable iterators with a variety of different kinds of iterator
# (such as one-shot and initializable iterators).
training_iterator = training_dataset.make_one_shot_iterator()
validation_iterator = validation_dataset.make_initializable_iterator()

# The `Iterator.string_handle()` method returns a tensor that can be evaluated
# and used to feed the `handle` placeholder.
training_handle = sess.run(training_iterator.string_handle())
validation_handle = sess.run(validation_iterator.string_handle())

# Loop forever, alternating between training and validation.
while True:
    # Run 200 steps using the training dataset. Note that the training dataset is
    # infinite, and we resume from where we left off in the previous `while` loop
    # iteration.
    for _ in range(5):
        print(_)
        sess.run(next_element, feed_dict={handle: training_handle})

    # Run one pass over the validation dataset.
    sess.run(validation_iterator.initializer)
    for _ in range(4):
        print("=====")
        print(_)
        sess.run(next_element, feed_dict={handle: validation_handle})

0
1
2
3
4
=====
0
=====
1
=====
2
=====
3
0
1
2
3
4
=====
0
=====
1
=====
2
=====
3
0
1
2
3
4
=====
0
=====
1
=====
2
=====
3
0
1
2
3
4
=====
0
=====
1
=====
2
=====
3
0
1
2
3
4
=====
0
=====
1
=====
2
=====
3
0
1
2
3
4
=====
0
=====
1
=====
2
=====
3
0
1
2
3
4
=====
0
=====
1
=====
2
=====
3
0
1
2
3
4
=====
0
=====
1
=====
2
=====
3
0
1
2
3
4
=====
0
=====
1
=====
2
=====
3
0
1
2
3
4
=====
0
=====
1
=====
2
=====
3
0
1
2
3
4
=====
0
=====
1
=====
2
=====
3
0
1
2
3
4
=====
0
=====
1
=====
2
=====
3
0
1
2
3
4
=====
0
=====
1
=====
2
=====
3
0
1
2
3
4
=====
0
=====
1
=====
2
=====
3
0
1
2
3
4
=====
0
=====
1
=====
2
=====
3
0
1
2
3
4
=====
0
=====
1
=====
2
=====
3
0
1
2
3
4
=====
0
=====
1
=====
2
=====
3
0
1
2
3
4
=====
0
=====
1
=====
2
=====
3
0
1
2
3
4
=====
0
=====
1
=====
2
=====
3
0
1
2
3
4
=====
0
=====
1
=====
2
=====
3
0


OutOfRangeError: End of sequence
	 [[Node: IteratorGetNext_15 = IteratorGetNext[output_shapes=[[]], output_types=[DT_INT64], _device="/job:localhost/replica:0/task:0/device:CPU:0"](IteratorFromStringHandle_3)]]

Caused by op 'IteratorGetNext_15', defined at:
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/runpy.py", line 170, in _run_module_as_main
    "__main__", mod_spec)
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/IPython/core/interactiveshell.py", line 2683, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/IPython/core/interactiveshell.py", line 2787, in run_ast_nodes
    if self.run_code(code, result):
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/IPython/core/interactiveshell.py", line 2847, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-30-91d23a8c634b>", line 13, in <module>
    next_element = iterator.get_next()
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/tensorflow/python/data/ops/iterator_ops.py", line 259, in get_next
    name=name))
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/tensorflow/python/ops/gen_dataset_ops.py", line 706, in iterator_get_next
    output_shapes=output_shapes, name=name)
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
    op_def=op_def)
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

OutOfRangeError (see above for traceback): End of sequence
	 [[Node: IteratorGetNext_15 = IteratorGetNext[output_shapes=[[]], output_types=[DT_INT64], _device="/job:localhost/replica:0/task:0/device:CPU:0"](IteratorFromStringHandle_3)]]


In [48]:
dataset = tf.data.Dataset.range(5)
iterator = dataset.make_initializable_iterator()
next_element = iterator.get_next()

# Typically `result` will be the output of a model, or an optimizer's
# training operation.
result = tf.add(next_element, next_element)

sess.run(iterator.initializer)
print(sess.run(result))  # ==> "0"
print(sess.run(result))  # ==> "2"
print(sess.run(result))  # ==> "4"
print(sess.run(result))  # ==> "6"
print(sess.run(result))  # ==> "8"
try:
    sess.run(result)
except tf.errors.OutOfRangeError:
    print("End of dataset")  # ==> "End of dataset"


0
2
4
6
8
End of dataset
