In [1]:
import cifar10, cifar10_input
import tensorflow as tf
import numpy as np
import time
import math

# define weight function

In [2]:
def variable_with_weight_loss(shape, stddev, wl):
    var = tf.Variable(tf.truncated_normal(shape, stddev=stddev))
    if wl is not None:
        weight_loss = tf.multiply(tf.nn.l2_loss(var), wl, name='weight_loss')
        tf.add_to_collection('losses', weight_loss)
    return var

# download and load data

In [3]:
data_dir = '/tmp/cifar10_data/cifar-10-batches-bin'
cifar10.maybe_download_and_extract()

# data augmentation

In [4]:
batch_size = 128 
# 16 independent threads are used to speed tasks for distorted_in|puts
images_train, labels_train = cifar10_input.distorted_inputs(data_dir=data_dir, batch_size=batch_size)
images_test, labels_test = cifar10_input.inputs(eval_data=True, data_dir=data_dir, batch_size=batch_size)

Filling queue with 20000 CIFAR images before starting to train. This will take a few minutes.


# placeholder

In [5]:
image_holder = tf.placeholder(tf.float32, [batch_size, 24, 24, 3])
label_holder = tf.placeholder(tf.int32, [batch_size])

# build network framework

In [6]:
# 1st layer conv
weight1 = variable_with_weight_loss(shape=[5, 5, 3, 64], stddev=5e-2, wl=0.0)
kernel1 = tf.nn.conv2d(image_holder, weight1, [1, 1, 1, 1], padding='SAME')
bias1 = tf.Variable(tf.constant(0.0, shape=[64]))
conv1 = tf.nn.relu(tf.nn.bias_add(kernel1, bias1))
pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001/9.0, beta=0.75)

# 2nd layer conv
weight2 = variable_with_weight_loss(shape=[5, 5, 64, 64], stddev=5e-2, wl=0.0)
kernel2= tf.nn.conv2d(norm1, weight2, [1, 1, 1, 1], padding='SAME')
bias2 = tf.Variable(tf.constant(0.1, shape=[64]))
conv2 = tf.nn.relu(tf.nn.bias_add(kernel2, bias2))
norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001/9.0, beta=0.75)
pool2 = tf.nn.max_pool(norm2, ksize=[1 ,3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')

# 1st fully connected layer
reshape = tf.reshape(pool2, [batch_size, -1])
dim = reshape.get_shape()[1].value
weight3 = variable_with_weight_loss(shape=[dim, 384], stddev=0.04, wl=0.004)
bias3 = tf.Variable(tf.constant(0.1, shape=[384]))
local3 = tf.nn.relu(tf.matmul(reshape, weight3) + bias3)

# 2nd fully connected layer
weight4 = variable_with_weight_loss(shape=[384, 192], stddev=0.04, wl=0.004)
bias4 = tf.Variable(tf.constant(0.1, shape=[192]))
local4 = tf.nn.relu(tf.matmul(local3, weight4) + bias4)

# 3rd fully connected layer
weight5 = variable_with_weight_loss(shape=[192, 10], stddev=1/192.0, wl=0.0)
bias5 = tf.Variable(tf.constant(0.0, shape=[10]))
# model inference output
logits = tf.add(tf.matmul(local4, weight5), bias5)

# loss

In [7]:
def loss(logits, labels):
    labels = tf.cast(labels, tf.int64)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels, name='cross_entropy_per_example')
    cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
    tf.add_to_collection('losses', cross_entropy_mean)
    return tf.add_n(tf.get_collection('losses'), name='total_loss')

loss = loss(logits, label_holder)

# optimization and top accuracy

In [8]:
train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)
top_k_op = tf.nn.in_top_k(logits, label_holder, 1)

# session

In [9]:
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# start data augmentation threads

In [10]:
tf.train.start_queue_runners()

[<Thread(Thread-4, started daemon 139795516880640)>,
 <Thread(Thread-5, started daemon 139795508487936)>,
 <Thread(Thread-6, started daemon 139795495909120)>,
 <Thread(Thread-7, started daemon 139794743031552)>,
 <Thread(Thread-8, started daemon 139794734638848)>,
 <Thread(Thread-9, started daemon 139794726246144)>,
 <Thread(Thread-10, started daemon 139794717853440)>,
 <Thread(Thread-11, started daemon 139794709460736)>,
 <Thread(Thread-12, started daemon 139794701068032)>,
 <Thread(Thread-13, started daemon 139794692675328)>,
 <Thread(Thread-14, started daemon 139794206160640)>,
 <Thread(Thread-15, started daemon 139794197767936)>,
 <Thread(Thread-16, started daemon 139794189375232)>,
 <Thread(Thread-17, started daemon 139794180982528)>,
 <Thread(Thread-18, started daemon 139794172589824)>,
 <Thread(Thread-19, started daemon 139794164197120)>,
 <Thread(Thread-20, started daemon 139794155804416)>,
 <Thread(Thread-21, started daemon 139793937725184)>,
 <Thread(Thread-22, started daemon

# train

In [11]:
max_steps = 1000
for step in range(max_steps):
    start_time = time.time()
    image_batch, label_batch = sess.run([images_train, labels_train])
    _, loss_value = sess.run([train_op, loss], 
                             feed_dict={image_holder:image_batch, label_holder:label_batch})
    duration = time.time() - start_time
    if step % 10 ==0:
        examples_per_sec = batch_size / duration
        sec_per_batch = float(duration)
        
        format_str=('step %d, loss=%.2f (%.1f examples/sec, %.3f sec/batch)')
        print(format_str % (step, loss_value, examples_per_sec, sec_per_batch))

step 0, loss=4.68 (79.9 examples/sec, 1.603 sec/batch)
step 10, loss=3.77 (199.4 examples/sec, 0.642 sec/batch)
step 20, loss=3.05 (198.5 examples/sec, 0.645 sec/batch)
step 30, loss=2.66 (187.2 examples/sec, 0.684 sec/batch)
step 40, loss=2.35 (187.3 examples/sec, 0.683 sec/batch)
step 50, loss=2.25 (170.6 examples/sec, 0.750 sec/batch)
step 60, loss=2.13 (199.6 examples/sec, 0.641 sec/batch)
step 70, loss=2.00 (152.5 examples/sec, 0.839 sec/batch)
step 80, loss=2.00 (187.5 examples/sec, 0.683 sec/batch)
step 90, loss=1.89 (172.7 examples/sec, 0.741 sec/batch)
step 100, loss=1.95 (170.4 examples/sec, 0.751 sec/batch)
step 110, loss=1.88 (197.9 examples/sec, 0.647 sec/batch)
step 120, loss=1.96 (119.3 examples/sec, 1.073 sec/batch)
step 130, loss=1.86 (188.4 examples/sec, 0.679 sec/batch)
step 140, loss=1.74 (185.7 examples/sec, 0.689 sec/batch)
step 150, loss=1.80 (170.2 examples/sec, 0.752 sec/batch)
step 160, loss=1.77 (171.0 examples/sec, 0.749 sec/batch)
step 170, loss=1.82 (169.7

# evaluation and accuracy

In [12]:
num_examples = 10000
num_iter = int(math.ceil(num_examples / batch_size))
true_count = 0
total_sample_count = num_iter * batch_size
step = 0
while step < num_iter:
    image_batch, label_batch = sess.run([images_test, labels_test])
    # top_k_op: k=1
    predictions = sess.run([top_k_op], feed_dict={
        image_holder: image_batch, label_holder: label_batch})
    true_count += np.sum(predictions)
    step +=1
    
precision = true_count * 1.0 / total_sample_count
print('precision @ top1 = %.3f' % precision)

precision @ top1 = 0.618
