In [20]:
import tensorflow as tf

In [21]:
# This function only uses tf.TextLineReader(...) and does not read other types of files i.e. binary files
def get_next_mini_batch(batch_size, num_epochs, input_paths, record_defaults):

    # string_input_producer creates a FIFO queue for holding the filenames until the reader needs them
    filename_queue = tf.train.string_input_producer(input_paths,
                                                    num_epochs = num_epochs,
                                                    shuffle = True)

    # Select the type of reader that will be used to read the CSV files down below.
    reader = tf.TextLineReader(skip_header_lines=1)

    # reader.read(..) just reads 1 row at a time
    key, value = reader.read(filename_queue)

    # The decode_csv op parses the result of reader.read_up_to(...) into a list of tensors.
    # For instance, col2 below is a list of tensors.
    col1, col2, col3, col4, col5, col6, col7, col8 = tf.decode_csv(value, record_defaults)

    # define which columns constitute features and which columns are labels and stack them together
    features = tf.stack([col2, col3, col4, col5, col6])
    labels = tf.stack([col7, col8])


    min_after_dequeue = batch_size * 3
    capacity = min_after_dequeue + 10 * batch_size

    X_mini_batch, Y_mini_batch = tf.train.shuffle_batch([features, labels], 
                                                        batch_size=batch_size, 
                                                        capacity=capacity,
                                                        min_after_dequeue = min_after_dequeue)
    
    return X_mini_batch, Y_mini_batch

In [22]:
tf.reset_default_graph()

batch_size = 5
num_epochs = 2
# Data may reside in several files. 2 is created here for illustration purposes
train_path1 = "train1.csv"
train_path2 = "train2.csv"
input_paths = [train_path1, train_path2]
# Determine default values for each column in case data is missing
record_defaults = [[""], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]]
X_mini_batch, Y_mini_batch = get_next_mini_batch(batch_size, num_epochs, input_paths, record_defaults)

with tf.Session() as sess:

  init_global_var = tf.global_variables_initializer() 
  sess.run(init_global_var)
  # initializing local variables needed to be able to set num_epochs
  init_local_var = tf.local_variables_initializer()   
  sess.run(init_local_var)
    
  # Start populating the filename queue.
  # tf.train.start_queue_runners(...) needs to be called before populating the queue ...
  # before you call run or eval to execute the read
  coord = tf.train.Coordinator()
  threads = tf.train.start_queue_runners(sess=sess, coord=coord)

  try:
    while not coord.should_stop():
      X_mb, Y_mb = sess.run([X_mini_batch, Y_mini_batch])
      print(X_mb)
      print(Y_mb)
  except tf.errors.OutOfRangeError:
    print('Done training, epoch reached')
  finally:
    coord.request_stop()
  
  coord.join(threads)

[[6. 3. 7. 8. 4.]
 [6. 6. 6. 6. 6.]
 [4. 4. 4. 4. 4.]
 [2. 2. 2. 2. 2.]
 [9. 8. 7. 4. 2.]]
[[0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]]
[[ 1.  5.  8.  3.  3.]
 [11. 11. 11. 11. 11.]
 [13. 13. 13. 13. 13.]
 [ 4.  6.  3.  4.  3.]
 [ 3.  3.  3.  3.  3.]]
[[0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]]
[[ 4.  6.  3.  4.  3.]
 [ 3.  5.  6.  9.  4.]
 [ 9.  9.  9.  9.  9.]
 [ 1.  5.  8.  3.  3.]
 [-1.  2.  3.  8.  5.]]
[[0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]]
[[ 9.  8.  7.  4.  2.]
 [ 3.  2.  8.  9.  1.]
 [-4.  2.  9.  4.  5.]
 [ 3.  2.  8.  9.  1.]
 [ 4.  7.  2.  3.  8.]]
[[0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]]
[[ 1.  5.  4.  2.  7.]
 [ 5.  5.  5.  5.  5.]
 [ 1.  5.  4.  2.  7.]
 [-4.  2.  9.  4.  5.]
 [ 4.  4.  4.  4.  4.]]
[[0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]]
[[ 8.  8.  8.  8.  8.]
 [-1.  2.  3.  8.  5.]
 [ 2.  2.  2.  2.  2.]
 [10. 10. 10. 10. 10.]
 [ 4.  7.  2.  3.  8.]]
[[1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]]
[[ 7.  7.  7.  7.  7.]
 [ 3.  5.  6.  9.  4.]
 [ 8.  