In [1]:
import tensorflow as tf
import numpy as np

In [2]:
# ONE SHOT ITERATOR with for loop

dataset = tf.data.Dataset.range(10)
iterator = dataset.make_one_shot_iterator()
next_element = iterator.get_next()

with tf.Session() as sess:
    for i in range(10):
      value = sess.run(next_element)
      print(value)
      assert i == value

0
1
2
3
4
5
6
7
8
9


In [3]:
# ONE SHOT ITERATOR with for loop

dataset = tf.data.Dataset.range(10)
iterator = dataset.make_one_shot_iterator()
next_element = iterator.get_next()

with tf.Session() as sess:
    for i in range(10):
      value = sess.run(next_element)
      print(value)
      assert i == value

0
1
2
3
4
5
6
7
8
9


In [4]:
# ONE SHOT ITERATOR with while loop

dataset1 = tf.data.Dataset.range(5)
iterator1 = dataset1.make_one_shot_iterator()
next_element1 = iterator1.get_next()

with tf.Session() as sess:
    while True:
        try:
          value = sess.run(next_element1)
          print(value)
        except tf.errors.OutOfRangeError:
          print("Out of range error triggered")
          break


0
1
2
3
4
Out of range error triggered


In [5]:
# INITIALIZABLE ITERATOR
# Reasonable when working with structured data, if you want to periodically (say after each 50th epoch) calculate
# cost & accuracy on dev set as well during the training

max_value = tf.placeholder(tf.int64, shape=[])
dataset2 = tf.data.Dataset.range(max_value)
iterator2 = dataset2.make_initializable_iterator()
next_element2 = iterator2.get_next()

with tf.Session() as sess:
    # Initialize an iterator over a dataset with 6 elements.
    sess.run(iterator2.initializer, feed_dict={max_value: 6})
    while True:
        try:
          value = sess.run(next_element2)
          print(value)
        except tf.errors.OutOfRangeError:
          print("Out of range error triggered (1)")
          break

    print("\nDone with the first iterator\n")
            
    # Initialize the same iterator over a dataset with 3 elements.
    sess.run(iterator2.initializer, feed_dict={max_value: 3})
    while True:
        try:
          value = sess.run(next_element2)
          print(value)
        except tf.errors.OutOfRangeError:
          print("Out of range error triggered (2)")
          break    

0
1
2
3
4
5
Out of range error triggered (1)

Done with the first iterator

0
1
2
Out of range error triggered (2)


In [6]:
# RE-INITIALIZABLE ITERATOR

# Define training and validation datasets with the same structure.
# In the case of validation_dataset below, the iterator returns 0 in the first call, 1 in the second call, and so on.
# training_dataset returns the same too but it sums x (the value to be returned) with a number between -4 and 4.
# So when x is 0, then the return value will be between -4 and 4. When x=1, the iterator will return a value between
# -3 and 5, and so on.. This is illustrated so to motivate the need for a re-initializable iterator. You might have a
# training input pipeline that uses random perturbations to the input images to improve generalization, and a 
# validation input pipeline that evaluates predictions on unmodified data. These pipelines will typically use 
# different Dataset objects that have the same structure (i.e. the same types and compatible shapes for each 
# component)
training_dataset = tf.data.Dataset.range(6).map(
    lambda x: x + tf.random_uniform([], -4, 4, tf.int64))
validation_dataset = tf.data.Dataset.range(3)

# A reinitializable iterator is defined by its structure. We could use the
# `output_types` and `output_shapes` properties of either `training_dataset`
# or `validation_dataset` here, because they are compatible.
iterator3 = tf.data.Iterator.from_structure(training_dataset.output_types,
                                            training_dataset.output_shapes)
next_element3 = iterator3.get_next()

training_init_op = iterator3.make_initializer(training_dataset)
validation_init_op = iterator3.make_initializer(validation_dataset)

with tf.Session() as sess:
    # Run 2 epochs in which the training dataset is traversed, followed by the
    # validation dataset.
    numEpochs = 2
    for _ in range(numEpochs):
      # Initialize an iterator over the training dataset.
      sess.run(training_init_op)  
      while True:
        try:
          value = sess.run(next_element3)
          print(value)
        except tf.errors.OutOfRangeError:
          print("Out of range error triggered (train set)")
          break
            
      # Initialize an iterator over the validation dataset.
      sess.run(validation_init_op)
      while True:
        try:
          value = sess.run(next_element3)
          print(value)
        except tf.errors.OutOfRangeError:
          print("Out of range error triggered (validation set)")
          break

-4
1
2
5
4
4
Out of range error triggered (train set)
0
1
2
Out of range error triggered (validation set)
-3
-2
-2
-1
3
2
Out of range error triggered (train set)
0
1
2
Out of range error triggered (validation set)


In [7]:
# FEEDABLE ITERATOR
# can be used together with tf.placeholder to select what Iterator to use in each call to tf.Session.run, 
# via the familiar feed_dict mechanism. It offers the same functionality as a reinitializable iterator, but it does 
# not require you to initialize the iterator from the start of a dataset when you switch between iterators.

# Feedable Iterator is particularly useful especially if the (training) dataset size is TOO LARGE, where you
# occasionally calculate cost/accuracy of the train set without completing 1 epoch, and the continue with the
# training from the point you left.

# Define training and validation datasets with the same structure.
training_dataset4 = tf.data.Dataset.range(5).map(
    lambda x: x + 1).repeat()  #tf.random_uniform([], -3, 3, tf.int64)).repeat()
validation_dataset4 = tf.data.Dataset.range(2)

# A feedable iterator is defined by a handle placeholder and its structure. We
# could use the `output_types` and `output_shapes` properties of either
# `training_dataset` or `validation_dataset` here, because they have
# identical structure.
handle = tf.placeholder(tf.string, shape=[])
iterator4 = tf.data.Iterator.from_string_handle(
    handle, training_dataset4.output_types, training_dataset4.output_shapes)
next_element4 = iterator4.get_next()

# You can use feedable iterators with a variety of different kinds of iterator
# (such as one-shot and initializable iterators).
training_iterator4 = training_dataset4.make_one_shot_iterator()
validation_iterator4 = validation_dataset4.make_initializable_iterator()

with tf.Session() as sess:
    # The `Iterator.string_handle()` method returns a tensor that can be evaluated
    # and used to feed the `handle` placeholder.
    training_handle = sess.run(training_iterator4.string_handle())
    validation_handle = sess.run(validation_iterator4.string_handle())

    # You can loop forever, alternating between training and validation.
    # For simplicity, we only loop 3 times here
    for _ in range(3):
      for _ in range(7):  # Even if range=5 in training_dataset4 definition above, we have the repeat() which enables looping more than 5 times
         # Run 7 steps using the training dataset. Note that the training dataset is
         # infinite, and we resume from where we left off in the previous `for` loop
         # iteration.
         print(sess.run(next_element4, feed_dict={handle: training_handle}))
      
      print("\nDone with training data\n")
    
      # Run one pass over the validation dataset. (1 epoch over validation dataset)
      sess.run(validation_iterator4.initializer)
      for _ in range(2):  # Since the range is set to 2 for validation_dataset4 above, this has to be 2
         print(sess.run(next_element4, feed_dict={handle: validation_handle}))
        
      print("\nDone with validation data\n")


1
2
3
4
5
1
2

Done with training data

0
1

Done with validation data

3
4
5
1
2
3
4

Done with training data

0
1

Done with validation data

5
1
2
3
4
5
1

Done with training data

0
1

Done with validation data



In [40]:
# READING DATA FROM train and validation (dev set) CSV FILES by using INITIALIZABLE ITERATORS

# All csv files have same # columns. First column is assumed to be train example ID, the next 5 columns are feature
# columns, and the last column is the label column

# ASSUMPTIONS: (Otherwise, decode_csv function needs update)
# 1) The first column is NOT a feature. (It is most probably a training example ID or similar)
# 2) The last column is always the label. And there is ONLY 1 column that represents the label.
#    If more than 1 column represents the label, see the next example down below

feature_names = ['f1','f2','f3','f4','f5']
record_defaults = [[""], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]]


def decode_csv(line):
   parsed_line = tf.decode_csv(line, record_defaults)
   label =  parsed_line[-1]      # label is the last element of the list
   del parsed_line[-1]           # delete the last element from the list
   del parsed_line[0]            # even delete the first element bcz it is assumed NOT to be a feature
   features = tf.stack(parsed_line)  # Stack features so that you can later vectorize forward prop., etc.
   #label = tf.stack(label)          #NOT needed. Only if more than 1 column makes the label...
   batch_to_return = features, label
   return batch_to_return
   
filenames = tf.placeholder(tf.string, shape=[None])
dataset5 = tf.data.Dataset.from_tensor_slices(filenames)
dataset5 = dataset5.flat_map(lambda filename: tf.data.TextLineDataset(filename).skip(1).map(decode_csv))
dataset5 = dataset5.shuffle(buffer_size=1000)
dataset5 = dataset5.batch(7)
iterator5 = dataset5.make_initializable_iterator()
next_element5 = iterator5.get_next()

# Initialize `iterator` with training data.
training_filenames = ["train_data1.csv", 
                      "train_data2.csv"]

# Initialize `iterator` with validation data.
validation_filenames = ["dev_data1.csv"]

with tf.Session() as sess:
    # Train 2 epochs. Then validate train set. Then validate dev set.
    for _ in range(2):     
        sess.run(iterator5.initializer, feed_dict={filenames: training_filenames})
        while True:
            try:
              features, labels = sess.run(next_element5)
              # Train...
              print("(train) features: ")
              print(features)
              print("(train) labels: ")
              print(labels)  
            except tf.errors.OutOfRangeError:
              print("Out of range error triggered (looped through training set 1 time)")
              break

    # Validate (cost, accuracy) on train set
    print("\nDone with the first iterator\n")
            
    sess.run(iterator5.initializer, feed_dict={filenames: validation_filenames})
    while True:
        try:
          features, labels = sess.run(next_element5)
          # Validate (cost, accuracy) on dev set
          print("(dev) features: ")
          print(features)
          print("(dev) labels: ")
          print(labels)
        except tf.errors.OutOfRangeError:
          print("Out of range error triggered (looped through dev set 1 time only)")
          break    

(train) features: 
[[17. 17. 17. 17. 17.]
 [27. 27. 27. 27. 27.]
 [ 5.  5.  5.  5.  5.]
 [22. 22. 22. 22. 22.]
 [13. 13. 13. 13. 13.]
 [25. 25. 25. 25. 25.]
 [18. 18. 18. 18. 18.]]
(train) labels: 
[33. 53.  9. 43. 25. 49. 35.]
(train) features: 
[[ 7.  7.  7.  7.  7.]
 [19. 19. 19. 19. 19.]
 [15. 15. 15. 15. 15.]
 [14. 14. 14. 14. 14.]
 [28. 28. 28. 28. 28.]
 [26. 26. 26. 26. 26.]
 [23. 23. 23. 23. 23.]]
(train) labels: 
[13. 37. 29. 27. 55. 51. 45.]
(train) features: 
[[ 1.  1.  1.  1.  1.]
 [12. 12. 12. 12. 12.]
 [ 6.  6.  6.  6.  6.]
 [ 3.  3.  3.  3.  3.]
 [ 2.  2.  2.  2.  2.]
 [ 8.  8.  8.  8.  8.]
 [16. 16. 16. 16. 16.]]
(train) labels: 
[ 1. 23. 11.  5.  3. 15. 31.]
(train) features: 
[[ 4.  4.  4.  4.  4.]
 [20. 20. 20. 20. 20.]
 [21. 21. 21. 21. 21.]
 [10. 10. 10. 10. 10.]
 [24. 24. 24. 24. 24.]
 [ 9.  9.  9.  9.  9.]
 [11. 11. 11. 11. 11.]]
(train) labels: 
[ 7. 39. 41. 19. 47. 17. 21.]
Out of range error triggered (looped through training set 1 time)
(train) features: 
[[2

In [43]:
# READING DATA FROM train and validation (dev set) CSV FILES by using INITIALIZABLE ITERATORS

# All csv files have same # columns. First column is assumed to be train example ID, the next 4 columns are feature
# columns, and the last 2 columns are the label columns

# ASSUMPTIONS: (Otherwise, decode_csv function needs update)
# 1) The first column is NOT a feature. (It is most probably a training example ID or similar)
# 2) The last 2 columns are always the label columns. (It could have been 3, 4, etc. Then update the decode_csv func.)

feature_names = ['f1','f2','f3','f4']  # Not used below. Just for illustration.
record_defaults = [[""], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]]


def decode_csv2(line):
   parsed_line = tf.decode_csv(line, record_defaults)
   label = parsed_line[-2:]     # last 2 columns are labels
   del parsed_line[-1]           # delete the last element from the list   (label column)
   del parsed_line[-2]           # delete the second last element from the list. (label column)
   del parsed_line[0]            # even delete the first element bcz it is assumed NOT to be a feature
   features = tf.stack(parsed_line)  # Stack features so that you can later vectorize forward prop., etc.
   label = tf.stack(label)          #NOT needed. If a vector needs to be constructed (e.g. for softmax), then maybe..
   batch_to_return = features, label
   return batch_to_return
   
filenames6 = tf.placeholder(tf.string, shape=[None])
dataset6 = tf.data.Dataset.from_tensor_slices(filenames6)
dataset6 = dataset6.flat_map(lambda filename: tf.data.TextLineDataset(filename).skip(1).map(decode_csv2))
dataset6 = dataset6.shuffle(buffer_size=1000)
dataset6 = dataset6.batch(7)
iterator6 = dataset6.make_initializable_iterator()
next_element6 = iterator6.get_next()

# Initialize `iterator` with training data.
training_filenames = ["train_data1.csv", 
                      "train_data2.csv"]

# Initialize `iterator` with validation data.
validation_filenames = ["dev_data1.csv"]

with tf.Session() as sess:
    # Train 2 epochs. Then validate train set. Then validate dev set.
    for _ in range(2):     
        sess.run(iterator6.initializer, feed_dict={filenames6: training_filenames})
        while True:
            try:
              features, labels = sess.run(next_element6)
              # Train...
              print("(train) features: ")
              print(features)
              print("(train) labels: ")
              print(labels)  
            except tf.errors.OutOfRangeError:
              print("Out of range error triggered (looped through training set 1 time)")
              break

    # Validate (cost, accuracy) on train set
    print("\nDone with the first iterator\n")
            
    sess.run(iterator6.initializer, feed_dict={filenames6: validation_filenames})
    while True:
        try:
          features, labels = sess.run(next_element6)
          # Validate (cost, accuracy) on dev set
          print("(dev) features: ")
          print(features)
          print("(dev) labels: ")
          print(labels)
        except tf.errors.OutOfRangeError:
          print("Out of range error triggered (looped through dev set 1 time only)")
          break    

(train) features: 
[[ 9.  9.  9.  9.]
 [19. 19. 19. 19.]
 [15. 15. 15. 15.]
 [21. 21. 21. 21.]
 [ 6.  6.  6.  6.]
 [27. 27. 27. 27.]
 [12. 12. 12. 12.]]
(train) labels: 
[[ 9. 17.]
 [19. 37.]
 [15. 29.]
 [21. 41.]
 [ 6. 11.]
 [27. 53.]
 [12. 23.]]
(train) features: 
[[10. 10. 10. 10.]
 [ 3.  3.  3.  3.]
 [20. 20. 20. 20.]
 [ 4.  4.  4.  4.]
 [18. 18. 18. 18.]
 [23. 23. 23. 23.]
 [ 2.  2.  2.  2.]]
(train) labels: 
[[10. 19.]
 [ 3.  5.]
 [20. 39.]
 [ 4.  7.]
 [18. 35.]
 [23. 45.]
 [ 2.  3.]]
(train) features: 
[[11. 11. 11. 11.]
 [ 5.  5.  5.  5.]
 [28. 28. 28. 28.]
 [16. 16. 16. 16.]
 [14. 14. 14. 14.]
 [24. 24. 24. 24.]
 [ 7.  7.  7.  7.]]
(train) labels: 
[[11. 21.]
 [ 5.  9.]
 [28. 55.]
 [16. 31.]
 [14. 27.]
 [24. 47.]
 [ 7. 13.]]
(train) features: 
[[ 8.  8.  8.  8.]
 [26. 26. 26. 26.]
 [17. 17. 17. 17.]
 [22. 22. 22. 22.]
 [ 1.  1.  1.  1.]
 [25. 25. 25. 25.]
 [13. 13. 13. 13.]]
(train) labels: 
[[ 8. 15.]
 [26. 51.]
 [17. 33.]
 [22. 43.]
 [ 1.  1.]
 [25. 49.]
 [13. 25.]]
Out of r