Jupyter notebook for [this post](https://ischlag.github.io/2016/06/19/tensorflow-input-pipeline-example/)

In [1]:
dataset_path      = "/home1/dataset/mnist/mnist/"
test_labels_file  = "test-labels.csv"
train_labels_file = "train-labels.csv"

In [2]:
def encode_label(label):
    return int(label)

def read_label_file(file):
    f = open(file, "r")
    filepaths = []
    labels = []
    for line in f:
        filepath, label = line.split(",")
        filepaths.append(filepath)
        labels.append(encode_label(label))
    return filepaths, labels

# reading labels and file path
train_filepaths, train_labels = read_label_file(dataset_path + train_labels_file)
test_filepaths, test_labels = read_label_file(dataset_path + test_labels_file)

In [3]:
# transform relative path into full path
train_filepaths = [ dataset_path + fp for fp in train_filepaths]
test_filepaths = [ dataset_path + fp for fp in test_filepaths]

# for this example we will create or own test partition
all_filepaths = train_filepaths + test_filepaths
all_labels = train_labels + test_labels

# we limit the number of files to 20 to make the output more clear!
all_filepaths = all_filepaths[:20]
all_labels = all_labels[:20]

In [4]:
print all_filepaths
print all_labels

['/home1/dataset/mnist/mnist/train-images/0.jpg', '/home1/dataset/mnist/mnist/train-images/1.jpg', '/home1/dataset/mnist/mnist/train-images/2.jpg', '/home1/dataset/mnist/mnist/train-images/3.jpg', '/home1/dataset/mnist/mnist/train-images/4.jpg', '/home1/dataset/mnist/mnist/train-images/5.jpg', '/home1/dataset/mnist/mnist/train-images/6.jpg', '/home1/dataset/mnist/mnist/train-images/7.jpg', '/home1/dataset/mnist/mnist/train-images/8.jpg', '/home1/dataset/mnist/mnist/train-images/9.jpg', '/home1/dataset/mnist/mnist/train-images/10.jpg', '/home1/dataset/mnist/mnist/train-images/11.jpg', '/home1/dataset/mnist/mnist/train-images/12.jpg', '/home1/dataset/mnist/mnist/train-images/13.jpg', '/home1/dataset/mnist/mnist/train-images/14.jpg', '/home1/dataset/mnist/mnist/train-images/15.jpg', '/home1/dataset/mnist/mnist/train-images/16.jpg', '/home1/dataset/mnist/mnist/train-images/17.jpg', '/home1/dataset/mnist/mnist/train-images/18.jpg', '/home1/dataset/mnist/mnist/train-images/19.jpg']
[5, 0, 4,

In [5]:
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes

# convert string into tensors
all_images = ops.convert_to_tensor(all_filepaths, dtype=dtypes.string)
all_labels = ops.convert_to_tensor(all_labels, dtype=dtypes.int32)

In [6]:
print all_images
print all_labels

Tensor("Const:0", shape=(20,), dtype=string)
Tensor("Const_1:0", shape=(20,), dtype=int32)


In [7]:
# create a partition vector
import random
import tensorflow as tf
test_set_size = 5
partitions = [0] * len(all_filepaths)
partitions[:test_set_size] = [1] * test_set_size
# random.shuffle(partitions)

# partition our data into a test and train set according to our partition vector
train_images, test_images = tf.dynamic_partition(all_images, partitions, 2)
train_labels, test_labels = tf.dynamic_partition(all_labels, partitions, 2)

In [8]:
print train_images

Tensor("DynamicPartition:0", shape=(?,), dtype=string)


In [9]:
# create input queues
NUM_CHANNELS=1
train_input_queue = tf.train.slice_input_producer(
                                    [train_images, train_labels],
                                    shuffle=False)
test_input_queue = tf.train.slice_input_producer(
                                    [test_images, test_labels],
                                    shuffle=False)

# process path and string tensor into an image and a label
file_content = tf.read_file(train_input_queue[0])
train_image = tf.image.decode_jpeg(file_content, channels=NUM_CHANNELS)
train_label = train_input_queue[1]

file_content = tf.read_file(test_input_queue[0])
test_image = tf.image.decode_jpeg(file_content, channels=NUM_CHANNELS)
test_label = test_input_queue[1]

In [10]:
# define tensor shape
IMAGE_HEIGHT=28
IMAGE_WIDTH=28
BATCH_SIZE=5
train_image.set_shape([IMAGE_HEIGHT, IMAGE_WIDTH, NUM_CHANNELS])
test_image.set_shape([IMAGE_HEIGHT, IMAGE_WIDTH, NUM_CHANNELS])


# collect batches of images before processing
train_image_batch, train_label_batch = tf.train.batch(
                                    [train_image, train_label],
                                    batch_size=BATCH_SIZE
                                    #,num_threads=1
                                    )
test_image_batch, test_label_batch = tf.train.batch(
                                    [test_image, test_label],
                                    batch_size=BATCH_SIZE
                                    #,num_threads=1
                                    )

In [11]:
with tf.Session() as sess:
  
    # initialize the variables
    sess.run(tf.initialize_all_variables())
    # initialize the queue threads to start to shovel data
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)

    print "from the train set:"
    for i in range(20):
        print sess.run(train_label_batch)

    print "from the test set:"
    for i in range(10):
        print sess.run(test_label_batch)

    # stop our queue threads and properly close the session
    coord.request_stop()
    coord.join(threads)
    sess.close()

Instructions for updating:
Use `tf.global_variables_initializer` instead.
from the train set:
[2 1 3 1 4]
[3 5 3 6 1]
[7 2 8 6 9]
[2 1 3 1 4]
[3 5 3 6 1]
[7 2 8 6 9]
[2 1 3 1 4]
[3 5 3 6 1]
[7 2 8 6 9]
[2 1 3 1 4]
[3 5 3 6 1]
[7 2 8 6 9]
[2 1 3 1 4]
[3 5 3 6 1]
[7 2 8 6 9]
[2 1 3 1 4]
[3 5 3 6 1]
[7 2 8 6 9]
[2 1 3 1 4]
[3 5 3 6 1]
from the test set:
[5 0 4 1 9]
[5 0 4 1 9]
[5 0 4 1 9]
[5 0 4 1 9]
[5 0 4 1 9]
[5 0 4 1 9]
[5 0 4 1 9]
[5 0 4 1 9]
[5 0 4 1 9]
[5 0 4 1 9]
