Cat Dog Classification
===

In [1]:
import tensorflow as tf
import os
import re
import numpy as np
import zipfile
import matplotlib.pyplot as plt

tf.logging.set_verbosity(tf.logging.INFO)

## Utility function
Image reader, Input pipeline, etc.

In [52]:
CAT = 0
DOG = 1

cwd = os.getcwd()

def prepare_file():
  file_list = ['train', 'test']
  valid = True

  for i in range(len(file_list)):
    filename = file_list[i] + '.zip'
    dest_filename = os.path.join(cwd, 'data', filename)

    if not os.path.exists(dest_filename):
      print('Please download ' + filename + ' and put on src/data folder')
      url = "https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/download/"
      print(url + filename)
      valid = False
      continue
    
    images_path = os.path.join(cwd, 'data', filename)

    zip = zipfile.ZipFile(dest_filename)
    if not os.path.exists(images_path):
        print('Extracting...')
        zip.extractall(os.path.join(cwd, 'data'))
      
  return valid

def read_image_label_list(folder_dir):
    dir_list = os.listdir(os.path.join(cwd, folder_dir))
    
    filenames = []
    labels = []
    
    for i, d in enumerate(dir_list):
        if re.search("train", folder_dir):
            if re.search("cat", d):
                labels.append(CAT)
            else:
                labels.append(DOG)
        else:
            labels.append(-1)
        filenames.append(os.path.join(cwd, folder_dir, d))
    
    return filenames, labels

def read_images_from_disk(input_queue):
    filename = input_queue[0]
    label = input_queue[1]
    
    file_contents = tf.read_file(filename)
    image = tf.image.decode_image(file_contents, channels=3)
    image.set_shape([None, None, 3])
    
    return image, label

def gen_input_fn(image_list, label_list, batch_size, shuffle):
    
    def input_fn():
        images = tf.convert_to_tensor(image_list, dtype=tf.string)
        labels = tf.convert_to_tensor(label_list, dtype=tf.int32)

        input_queue = tf.train.slice_input_producer(
            [images, labels],
            capacity=batch_size * 5,
            shuffle=shuffle,
            name="file_input_queue"
        )

        image, label = read_images_from_disk(input_queue)

        image = tf.image.resize_images(image, (256, 256), tf.image.ResizeMethod.NEAREST_NEIGHBOR)

        image_batch, label_batch = tf.train.batch(
            [image, label],
            batch_size=batch_size,
            num_threads=1,
            name="batch_queue",
            capacity=batch_size * 10,
            allow_smaller_final_batch=False
        )
        
        return tf.identity(image_batch, name="features"), tf.identity(label_batch, name="label")

    return input_fn

def train_valid_input_fn(data_dir, batch_size, shuffle):
    img, labels = read_image_label_list(data_dir)
    img = np.array(img)
    labels = np.array(labels)
    data_size = img.shape[0]

    print("Data size: " + str(data_size))
    split = int(0.7 * data_size)

    random_seq = np.random.permutation(data_size)

    img = img[random_seq]
    labels = labels[random_seq]

    return (
        gen_input_fn(img[0:split], labels[0:split], batch_size, shuffle),
        gen_input_fn(img[split:], labels[split:], batch_size, shuffle)
    )

def test_input_fn(data_dir, batch_size, shuffle):
    image_list, label_list = read_image_label_list(data_dir)
    return gen_input_fn(image_list, label_list, batch_size, shuffle)

### Preview Data
Check correctness of data

In [4]:
if prepare_file():
    print "Files are ready \o/"

Files are ready \o/


In [53]:
def plot_img(data, label=None):
    plt.figure()
    plt.imshow(data)
    if label is not None:
        plt.title(label)

def preview_img():

    img_preview = tf.Graph()
    with img_preview.as_default():
        tensor_train, _ = train_valid_input_fn('data/train', batch_size=10, shuffle=False)
        img, _ = tensor_train()
        

    with tf.Session(graph=img_preview) as sess:
        sess.run(tf.local_variables_initializer())
        sess.run(tf.global_variables_initializer())
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        print(img.eval())
        print('###')

        coord.request_stop()
        coord.join(threads)

        sess.close()

preview_img()

Data size: 25000
INFO:tensorflow:Error reported to Coordinator: <class 'tensorflow.python.framework.errors_impl.NotFoundError'>, dog.814.jpg
	 [[Node: ReadFile = ReadFile[_device="/job:localhost/replica:0/task:0/cpu:0"](file_input_queue/Gather)]]


OutOfRangeError: FIFOQueue '_51_batch_queue/fifo_queue' is closed and has insufficient elements (requested 10, current size 0)
	 [[Node: batch_queue = QueueDequeueManyV2[component_types=[DT_UINT8, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](batch_queue/fifo_queue, batch_queue/n)]]

Caused by op u'batch_queue', defined at:
  File "/usr/lib/python2.7/runpy.py", line 174, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/usr/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python2.7/dist-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/usr/local/lib/python2.7/dist-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2828, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-53-e38e74802027>", line 29, in <module>
    preview_img()
  File "<ipython-input-53-e38e74802027>", line 12, in preview_img
    img, _ = tensor_train()
  File "<ipython-input-52-8729b0626ded>", line 82, in input_fn
    allow_smaller_final_batch=False
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/input.py", line 919, in batch
    name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/input.py", line 716, in _batch
    dequeued = queue.dequeue_many(batch_size, name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/data_flow_ops.py", line 457, in dequeue_many
    self._queue_ref, n=n, component_types=self._dtypes, name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_data_flow_ops.py", line 946, in _queue_dequeue_many_v2
    timeout_ms=timeout_ms, name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
    op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2506, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1269, in __init__
    self._traceback = _extract_stack()

OutOfRangeError (see above for traceback): FIFOQueue '_51_batch_queue/fifo_queue' is closed and has insufficient elements (requested 10, current size 0)
	 [[Node: batch_queue = QueueDequeueManyV2[component_types=[DT_UINT8, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](batch_queue/fifo_queue, batch_queue/n)]]
