In [1]:
import os
import sys
from six.moves import urllib
import tarfile
import tensorflow as tf
import numpy as np
import time
import math

# define weight function

In [2]:
def variable_with_weight_loss(shape, stddev, wl):
    var = tf.Variable(tf.truncated_normal(shape, stddev=stddev))
    if wl is not None:
        weight_loss = tf.multiply(tf.nn.l2_loss(var), wl, name='weight_loss')
        tf.add_to_collection('losses', weight_loss)
    return var

# download function

In [3]:
def maybe_download_and_extract(data_dir, data_url):
  dest_directory = data_dir
  if not os.path.exists(dest_directory):
    os.makedirs(dest_directory)
  filename = data_url.split('/')[-1]
  filepath = os.path.join(dest_directory, filename)
  if not os.path.exists(filepath):
    def _progress(count, block_size, total_size):
      sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
          float(count * block_size) / float(total_size) * 100.0))
      sys.stdout.flush()
    filepath, _ = urllib.request.urlretrieve(data_url, filepath, _progress)
    print()
    statinfo = os.stat(filepath)
    print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
  extracted_dir_path = os.path.join(dest_directory, 'cifar-10-batches-bin')
  if not os.path.exists(extracted_dir_path):
    tarfile.open(filepath, 'r:gz').extractall(dest_directory)

# load data

In [4]:
data_dir = '/tmp/cifar10_data/cifar-10-batches-bin'
data_url = 'http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz'
maybe_download_and_extract(data_dir, data_url)

# Reads and parses examples from data files

In [5]:
def read_cifar10(filename_queue):
  class CIFAR10Record(object):
    pass
  result = CIFAR10Record()

  # Dimensions of the images in the CIFAR-10 dataset.
  label_bytes = 1  # 2 for CIFAR-100
  result.height = 32
  result.width = 32
  result.depth = 3
  image_bytes = result.height * result.width * result.depth
  # Every record consists of a label followed by the image, with a
  # fixed number of bytes for each.
  record_bytes = label_bytes + image_bytes

  # Read a record, getting filenames from the filename_queue.  No
  # header or footer in the CIFAR-10 format, so we leave header_bytes
  # and footer_bytes at their default of 0.
  reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
  result.key, value = reader.read(filename_queue)

  # Convert from a string to a vector of uint8 that is record_bytes long.
  record_bytes = tf.decode_raw(value, tf.uint8)

  # The first bytes represent the label, which we convert from uint8->int32.
  result.label = tf.cast(
      tf.strided_slice(record_bytes, [0], [label_bytes]), tf.int32)

  # The remaining bytes after the label represent the image, which we reshape
  # from [depth * height * width] to [depth, height, width].
  depth_major = tf.reshape(
      tf.strided_slice(record_bytes, [label_bytes],
                       [label_bytes + image_bytes]),
      [result.depth, result.height, result.width])
  # Convert from [depth, height, width] to [height, width, depth].
  result.uint8image = tf.transpose(depth_major, [1, 2, 0])

  return result

# Construct a queued batch of images and labels

In [6]:
def _generate_image_and_label_batch(image, label, min_queue_examples,
                                     batch_size, shuffle):
  # Create a queue that shuffles the examples, and then
  # read 'batch_size' images + labels from the example queue.
  num_preprocess_threads = 16
  if shuffle:
    images, label_batch = tf.train.shuffle_batch(
        [image, label],
        batch_size=batch_size,
        num_threads=num_preprocess_threads,
        capacity=min_queue_examples + 3 * batch_size,
        min_after_dequeue=min_queue_examples)
  else:
    images, label_batch = tf.train.batch(
        [image, label],
        batch_size=batch_size,
        num_threads=num_preprocess_threads,
        capacity=min_queue_examples + 3 * batch_size)

  # Display the training images in the visualizer.
  tf.summary.image('images', images)

  return images, tf.reshape(label_batch, [batch_size])

# distorted input function

In [7]:
def distorted_inputs(data_dir, batch_size, image_size, num_examples_per_epoch_for_train):
  filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i)
               for i in xrange(1, 6)]
  for f in filenames:
    if not tf.gfile.Exists(f):
      raise ValueError('Failed to find file: ' + f)

  # Create a queue that produces the filenames to read.
  filename_queue = tf.train.string_input_producer(filenames)

  # Read examples from files in the filename queue.
  read_input = read_cifar10(filename_queue)
  reshaped_image = tf.cast(read_input.uint8image, tf.float32)

  height = image_size
  width = image_size

  # Image processing for training the network. Note the many random
  # distortions applied to the image.

  # Randomly crop a [height, width] section of the image.
  distorted_image = tf.random_crop(reshaped_image, [height, width, 3])

  # Randomly flip the image horizontally.
  distorted_image = tf.image.random_flip_left_right(distorted_image)

  # Because these operations are not commutative, consider randomizing
  # the order their operation.
  # NOTE: since per_image_standardization zeros the mean and makes
  # the stddev unit, this likely has no effect see tensorflow#1458.
  distorted_image = tf.image.random_brightness(distorted_image,
                                               max_delta=63)
  distorted_image = tf.image.random_contrast(distorted_image,
                                             lower=0.2, upper=1.8)

  # Subtract off the mean and divide by the variance of the pixels.
  float_image = tf.image.per_image_standardization(distorted_image)

  # Set the shapes of tensors.
  float_image.set_shape([height, width, 3])
  read_input.label.set_shape([1])

  # Ensure that the random shuffling has good mixing properties.
  min_fraction_of_examples_in_queue = 0.4
  min_queue_examples = int(num_examples_per_epoch_for_train *
                           min_fraction_of_examples_in_queue)
  print ('Filling queue with %d CIFAR images before starting to train. '
         'This will take a few minutes.' % min_queue_examples)

  # Generate a batch of images and labels by building up a queue of examples.
  return _generate_image_and_label_batch(float_image, read_input.label,
                                         min_queue_examples, batch_size,
                                         shuffle=True)

# Construct input for evaluation using the Reader ops

In [8]:
def inputs(eval_data, data_dir, batch_size, image_size,  
           num_examples_per_epoch_for_train, num_examples_per_epoch_for_eval):
  if not eval_data:
    filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i)
                 for i in xrange(1, 6)]
    num_examples_per_epoch = num_examples_per_epoch_for_train
  else:
    filenames = [os.path.join(data_dir, 'test_batch.bin')]
    num_examples_per_epoch = num_examples_per_epoch_for_eval

  for f in filenames:
    if not tf.gfile.Exists(f):
      raise ValueError('Failed to find file: ' + f)

  # Create a queue that produces the filenames to read.
  filename_queue = tf.train.string_input_producer(filenames)

  # Read examples from files in the filename queue.
  read_input = read_cifar10(filename_queue)
  reshaped_image = tf.cast(read_input.uint8image, tf.float32)

  height = image_size
  width = image_size

  # Image processing for evaluation.
  # Crop the central [height, width] of the image.
  resized_image = tf.image.resize_image_with_crop_or_pad(reshaped_image,
                                                         height, width)

  # Subtract off the mean and divide by the variance of the pixels.
  float_image = tf.image.per_image_standardization(resized_image)

  # Set the shapes of tensors.
  float_image.set_shape([height, width, 3])
  read_input.label.set_shape([1])

  # Ensure that the random shuffling has good mixing properties.
  min_fraction_of_examples_in_queue = 0.4
  min_queue_examples = int(num_examples_per_epoch *
                           min_fraction_of_examples_in_queue)

  # Generate a batch of images and labels by building up a queue of examples.
  return _generate_image_and_label_batch(float_image, read_input.label,
                                         min_queue_examples, batch_size,
                                         shuffle=False)

# data augmentation

In [9]:
batch_size = 128 
image_size = 24
num_examples_per_epoch_for_train = 50000
num_examples_per_epoch_for_eval = 10000

# 16 independent threads are used to speed tasks for distorted_in|puts
images_train, labels_train = distorted_inputs(
    data_dir=data_dir, batch_size=batch_size, image_size=image_size,
    num_examples_per_epoch_for_train=num_examples_per_epoch_for_train)

images_test, labels_test = inputs(
    eval_data=True, data_dir=data_dir, batch_size=batch_size, image_size=image_size, 
    num_examples_per_epoch_for_train=num_examples_per_epoch_for_train,
    num_examples_per_epoch_for_eval=num_examples_per_epoch_for_eval)

Filling queue with 20000 CIFAR images before starting to train. This will take a few minutes.


# placeholder

In [10]:
image_holder = tf.placeholder(tf.float32, [batch_size, 24, 24, 3])
label_holder = tf.placeholder(tf.int32, [batch_size])

# build network framework

In [11]:
# 1st layer conv
weight1 = variable_with_weight_loss(shape=[5, 5, 3, 64], stddev=5e-2, wl=0.0)
kernel1 = tf.nn.conv2d(image_holder, weight1, [1, 1, 1, 1], padding='SAME')
bias1 = tf.Variable(tf.constant(0.0, shape=[64]))
conv1 = tf.nn.relu(tf.nn.bias_add(kernel1, bias1))
pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001/9.0, beta=0.75)

# 2nd layer conv
weight2 = variable_with_weight_loss(shape=[5, 5, 64, 64], stddev=5e-2, wl=0.0)
kernel2= tf.nn.conv2d(norm1, weight2, [1, 1, 1, 1], padding='SAME')
bias2 = tf.Variable(tf.constant(0.1, shape=[64]))
conv2 = tf.nn.relu(tf.nn.bias_add(kernel2, bias2))
norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001/9.0, beta=0.75)
pool2 = tf.nn.max_pool(norm2, ksize=[1 ,3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')

# 1st fully connected layer
reshape = tf.reshape(pool2, [batch_size, -1])
dim = reshape.get_shape()[1].value
weight3 = variable_with_weight_loss(shape=[dim, 384], stddev=0.04, wl=0.004)
bias3 = tf.Variable(tf.constant(0.1, shape=[384]))
local3 = tf.nn.relu(tf.matmul(reshape, weight3) + bias3)

# 2nd fully connected layer
weight4 = variable_with_weight_loss(shape=[384, 192], stddev=0.04, wl=0.004)
bias4 = tf.Variable(tf.constant(0.1, shape=[192]))
local4 = tf.nn.relu(tf.matmul(local3, weight4) + bias4)

# 3rd fully connected layer
weight5 = variable_with_weight_loss(shape=[192, 10], stddev=1/192.0, wl=0.0)
bias5 = tf.Variable(tf.constant(0.0, shape=[10]))
# model inference output
logits = tf.add(tf.matmul(local4, weight5), bias5)

# loss

In [12]:
def loss(logits, labels):
    labels = tf.cast(labels, tf.int64)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels, name='cross_entropy_per_example')
    cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
    tf.add_to_collection('losses', cross_entropy_mean)
    return tf.add_n(tf.get_collection('losses'), name='total_loss')

loss = loss(logits, label_holder)

# optimization and top accuracy

In [13]:
train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)
top_k_op = tf.nn.in_top_k(logits, label_holder, 1)

# session

In [14]:
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# start data augmentation threads

In [15]:
tf.train.start_queue_runners()

[<Thread(Thread-4, started daemon 140442725250816)>,
 <Thread(Thread-5, started daemon 140442716858112)>,
 <Thread(Thread-6, started daemon 140442704279296)>,
 <Thread(Thread-7, started daemon 140442695886592)>,
 <Thread(Thread-8, started daemon 140442410678016)>,
 <Thread(Thread-9, started daemon 140442402285312)>,
 <Thread(Thread-10, started daemon 140442393892608)>,
 <Thread(Thread-11, started daemon 140442385499904)>,
 <Thread(Thread-12, started daemon 140442377107200)>,
 <Thread(Thread-13, started daemon 140442368714496)>,
 <Thread(Thread-14, started daemon 140442360321792)>,
 <Thread(Thread-15, started daemon 140441806698240)>,
 <Thread(Thread-16, started daemon 140441798305536)>,
 <Thread(Thread-17, started daemon 140441789912832)>,
 <Thread(Thread-18, started daemon 140441781520128)>,
 <Thread(Thread-19, started daemon 140441773127424)>,
 <Thread(Thread-20, started daemon 140441764734720)>,
 <Thread(Thread-21, started daemon 140441756342016)>,
 <Thread(Thread-22, started daemon

# train

In [16]:
max_steps = 1000
for step in range(max_steps):
    start_time = time.time()
    image_batch, label_batch = sess.run([images_train, labels_train])
    _, loss_value = sess.run([train_op, loss], 
                             feed_dict={image_holder:image_batch, label_holder:label_batch})
    duration = time.time() - start_time
    if step % 10 ==0:
        examples_per_sec = batch_size / duration
        sec_per_batch = float(duration)
        
        format_str=('step %d, loss=%.2f (%.1f examples/sec, %.3f sec/batch)')
        print(format_str % (step, loss_value, examples_per_sec, sec_per_batch))

step 0, loss=4.67 (120.9 examples/sec, 1.059 sec/batch)
step 10, loss=3.73 (200.5 examples/sec, 0.638 sec/batch)
step 20, loss=3.15 (122.5 examples/sec, 1.045 sec/batch)
step 30, loss=2.69 (170.6 examples/sec, 0.750 sec/batch)
step 40, loss=2.60 (188.7 examples/sec, 0.678 sec/batch)
step 50, loss=2.30 (186.9 examples/sec, 0.685 sec/batch)
step 60, loss=2.40 (187.4 examples/sec, 0.683 sec/batch)
step 70, loss=1.98 (186.5 examples/sec, 0.686 sec/batch)
step 80, loss=2.01 (188.9 examples/sec, 0.678 sec/batch)
step 90, loss=1.96 (171.2 examples/sec, 0.748 sec/batch)
step 100, loss=1.88 (170.3 examples/sec, 0.752 sec/batch)
step 110, loss=2.03 (171.6 examples/sec, 0.746 sec/batch)
step 120, loss=1.73 (178.6 examples/sec, 0.717 sec/batch)
step 130, loss=1.96 (189.3 examples/sec, 0.676 sec/batch)
step 140, loss=1.95 (189.1 examples/sec, 0.677 sec/batch)
step 150, loss=1.79 (99.7 examples/sec, 1.284 sec/batch)
step 160, loss=1.78 (180.5 examples/sec, 0.709 sec/batch)
step 170, loss=1.73 (171.6

# evaluation and accuracy

In [17]:
num_examples = 10000
num_iter = int(math.ceil(num_examples / batch_size))
true_count = 0
total_sample_count = num_iter * batch_size
step = 0
while step < num_iter:
    image_batch, label_batch = sess.run([images_test, labels_test])
    # top_k_op: k=1
    predictions = sess.run([top_k_op], feed_dict={
        image_holder: image_batch, label_holder: label_batch})
    true_count += np.sum(predictions)
    step +=1
    
precision = true_count * 1.0 / total_sample_count
print('precision @ top1 = %.3f' % precision)

precision @ top1 = 0.630
