In [None]:
%pylab inline
%install_ext https://raw.github.com/cjdrake/ipython-magic/master/gvmagic.py
%load_ext gvmagic

import tensorflow as tf
sess = tf.InteractiveSession()
def myshow(image, label=None):
  image = image/1.0
  image = image - amin(image)
  image /= amax(image)
  axis('off')
  if label:
    title(label)
  imshow(image)

### Image Classification Using TensorFlow

* Task : Classify 32x32 RGB images across 10 categories
  - `airplane, automobile, bird, cat, deer, dog, frog, horse, ship, and truck`

* Data available at [http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz](http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz)

In [None]:
!ls -l data/cifar-10-batches-bin/*_1.bin

* Each file is 10000 images, each 32 * 32 RGB, plus one byte for label
  - $(32 \times 32 \times 3 \times 1) \times 10000 = 30730000 \ bytes$

* Read the data

In [None]:
data = []
for i in range(1, 6):
  with open(
    'data/cifar-10-batches-bin/data_batch_%d.bin' % i, 'rb') as batch:
    data.append(batch.read())
label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog',
               'frog', 'horse', 'ship', 'truck']

* Decode the raw bytes and create a mini-batch of Tensors

In [None]:
def get_mini_batch(bytes, batch_size):
  img_size = 32 * 32 * 3
  # One 'record' is an image and a label
  record_size = img_size + 1
  images = []
  labels = []
  for i in range(batch_size):
    offset = i * record_size
    # Slice off a record and decode it in to a Tensor of type uint8
    decoded = tf.decode_raw(bytes[offset:offset + record_size], tf.uint8)
    # Slice off the label
    label =  tf.cast(decoded[0:1], tf.int32)
    # Slice off the image and reshape to 3-D
    image = tf.reshape(decoded[1:1+img_size], [3, 32, 32])
    # Reformat from [color, x, y] to [x, y, color]
    image = tf.transpose(image, [1, 2, 0])
    images.append(image)
    labels.append(label)
  return images, labels

* Let's look at the images

In [None]:
images, labels =  get_mini_batch(data[0], 10)
fig = figure(figsize = (7, 3.5))
for i in range(10):
  image, label = images[i], labels[i]
  subplot(2, 5, i)
  myshow(image.eval(), label_names[label.eval()[0]])

* Let's augment the data set
  - Extract a random 24 x 24 crop
  - Random flip left-to-right
  - Random brightness/contrast
  - Whiten the image (subtract mean, divide by variance)

In [None]:
def distort(image):
  image = tf.random_crop(image, [24, 24, 3])
  image = tf.image.random_flip_left_right(image)
  image = tf.image.random_brightness(image, max_delta=63./255.)
  image = tf.image.random_contrast(image, lower=0.2, upper=1.8)
  # This converts the image to floating point values
  image = tf.image.per_image_whitening(image)
  return image


* Let's look at the images again

In [None]:
images, _ =  get_mini_batch(data[3], 5)
figsize(7, 6)
for i in range(5):
  subplot(5, 6, i*6+1)
  myshow(images[i].eval(), 'original')
  for j in range(5):
    subplot(5, 6, i*6+j+2)
    myshow(distort(images[i]).eval())


* Let's build a model to classify these images

In [None]:
cifar_model = """
digraph G {
  size="10!";
  rankdir=LR;
  node [shape=box, style="filled, rounded", fillcolor=red];
  image -> conv1 -> pool1 -> norm1 -> conv2 -> norm2 -> pool2 -> local3 -> local4 -> softmax;
}
"""

In [None]:
%dotstr cifar_model

In [None]:
# tf.get_variable_scope().reuse_variables()

In [None]:
def conv1(input):
  with tf.variable_scope('conv1'):
    weights = tf.get_variable('weights',
      shape=[5, 5, 3, 64],
      initializer=tf.truncated_normal_initializer(stddev=1e-4))
    biases = tf.get_variable('biases',
      shape=[64],
      initializer=tf.constant_initializer(0.0))
  return tf.nn.conv2d(
    input, weights, [1, 1, 1, 1], padding='SAME') + biases

In [None]:
def pool1(input):
  return tf.nn.max_pool(input, ksize=[1, 3, 3, 1],
                        strides=[1, 2, 2, 1],
                        padding='SAME')

In [None]:
def norm1(input):
  return tf.nn.lrn(input, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)

In [None]:
def conv2(input):
  with tf.variable_scope('conv2'):
    weights = tf.get_variable('weights',
      shape=[5, 5, 64, 64],
      initializer=tf.truncated_normal_initializer(stddev=1e-4))
    biases = tf.get_variable('biases',
      shape=[64],
      initializer=tf.constant_initializer(0.1))
  return tf.nn.conv2d(
    input, weights, [1, 1, 1, 1], padding='SAME') + biases

def norm2(input):
  return tf.nn.lrn(input, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)

def pool2(input):
  return tf.nn.max_pool(input, ksize=[1, 3, 3, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

In [None]:
im, _ = get_mini_batch(data[3], 1)
im = distort(im[0])
im = tf.expand_dims(im, 0)
tf.initialize_all_variables().run()
x = pool2(norm2(conv2(norm1(pool1(conv1(im)))))).eval()
myshow(x[0, :, :, 0])