In [12]:
import tensorflow as tf
import numpy as np
import math

In [10]:
import util

address = '../data/'
file_names = {}
file_names['images'] = 'small_data.npy'
file_names['subs'] = 'small_subredditlabels'
file_names['dict'] = 'small_subredditIndex'
X_train, y_train, X_val, y_val, X_test, y_test, dictionary = util.import_dataset(address, file_names)

## Inception

In [2]:
# Refer to original paper (https://arxiv.org/pdf/1502.03167.pdf) for details
# strided boolean refers to whether the incception module employs stride-2 conv/pool layers, along with a pass through 
# projection layer, as referenced in the paper above (inception modules 3c and 4e)
def Inception(input_layer, num_1x1, num_3x3_reduce, num_3x3, num_double_3x3_reduce, num_double_3x3, pool_type, proj_size, strided):
    strides = [2, 2] if strided else [1, 1]  # last layer strides (before concatenation)
    
    inception_1_conv1 = tf.layers.conv2d(input_layer, num_1x1, [1, 1], strides=strides, padding="SAME")
    if num_1x1 > 0:
        inception_1_bn1 = tf.layers.batch_normalization(inception_1_conv1, training=is_training)
        inception_1 = tf.nn.relu(inception_1_bn1)
    else:
        inception_1 = inception_1_conv1  # tensor with no elements
    
    inception_2_conv1 = tf.layers.conv2d(input_layer, num_3x3_reduce, [1, 1], strides=[1, 1], padding="SAME", activation=tf.nn.relu)
    inception_2_conv2 = tf.layers.conv2d(inception_2_conv1, num_3x3, [3, 3], strides=strides, padding="SAME")
    inception_2_bn1 = tf.layers.batch_normalization(inception_2_conv2, training=is_training)
    inception_2 = tf.nn.relu(inception_2_bn1)
    
    inception_3_conv1 = tf.layers.conv2d(input_layer, 64, [1, 1], strides=[1, 1], padding="SAME", activation=tf.nn.relu)
    inception_3_conv2 = tf.layers.conv2d(inception_3_conv1, 96, [3, 3], strides=[1, 1], padding="SAME", activation=tf.nn.relu)
    inception_3_conv3 = tf.layers.conv2d(inception_3_conv2, 96, [3, 3], strides=strides, padding="SAME")
    inception_3_bn1 = tf.layers.batch_normalization(inception_3_conv3, training=is_training)
    inception_3 = tf.nn.relu(inception_3_bn1)
    
    inception_4_pool1 = tf.nn.pool(input_layer, [3, 3], pool_type, "SAME", strides=strides)
    if proj_size == 0:
        inception_4 = tf.nn.relu(inception_4_pool1)  # pass through layer if proj_size is 0
    else:
        inception_4_conv1 = tf.layers.conv2d(inception_4_pool1, proj_size, [1, 1], padding="SAME")
        inception_4_bn1 = tf.layers.batch_normalization(inception_4_conv1, training=is_training)
        inception_4 = tf.nn.relu(inception_4_bn1)
    
    inception_out = tf.concat([inception_1, inception_2, inception_3, inception_4], -1)
    return inception_out

## GoogleNet

In [6]:
def GoogleNet(X, is_training, keep_prob=1.0):
    conv_1 = tf.layers.conv2d(X, 64, [7, 7], strides=[2, 2], padding="SAME", activation=tf.nn.relu)
    pool_1 = tf.nn.max_pool(conv_1, [1, 3, 3, 1], [1, 2, 2, 1], "SAME")
    norm_1 = tf.layers.batch_normalization(pool_1, training=is_training)
    conv_2 = tf.layers.conv2d(norm_1, 192, [3, 3], strides=[1, 1], padding="SAME", activation=tf.nn.relu)
    pool_2 = tf.nn.max_pool(conv_2, [1, 3, 3, 1], [1, 2, 2, 1], "SAME")
    norm_2 = tf.layers.batch_normalization(pool_2, training=is_training)
    
    inception_1a = Inception(norm_2, 64, 64, 64, 64, 96, "AVG", 32, False)
    inception_1b = Inception(inception_1a, 64, 64, 96, 64, 96, "AVG", 64, False)
    inception_1c = Inception(inception_1b, 0, 128, 160, 64, 96, "MAX", 0, True)
    inception_2a = Inception(inception_1c, 224, 64, 96, 96, 128, "AVG", 128, False)
    inception_2b = Inception(inception_2a, 192, 96, 128, 96, 128, "AVG", 128, False)
    inception_2c = Inception(inception_2b, 160, 128, 160, 128, 160, "AVG", 128, False)
    inception_2d = Inception(inception_2c, 96, 128, 192, 160, 192, "AVG", 128, False)
    inception_2e = Inception(inception_2d, 0, 128, 192, 192, 256, "MAX", 0, True)
    inception_3a = Inception(inception_2e, 352, 192, 320, 160, 224, "AVG", 128, False)
    inception_3b = Inception(inception_3a, 352, 192, 320, 192, 224, "MAX", 128, False)
    
    # The following pooling size is changed from the original paper due to different starting image sizes
    pool_3 = tf.nn.pool(inception_3b, [4, 4], "AVG", "VALID", strides=[1, 1])
    if keep_prob < 1.0:
        pool_3 = tf.nn.dropout(pool_3, keep_prob)
    y_out = tf.layers.dense(pool_3, 20)
    return y_out[:, 0, 0, :]

# Now we're going to feed a random batch into the model 
# and make sure the output is the right size
X = tf.placeholder(tf.float32, [None, 128, 128, 3])
y = tf.placeholder(tf.int64, [None])
is_training = tf.placeholder(tf.bool)

y_out = GoogleNet(X, is_training)
total_loss = tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(y, 20), logits=y_out)
mean_loss = tf.reduce_mean(total_loss)

# define our optimizer
optimizer = tf.train.AdamOptimizer(5e-4) # select optimizer and set learning rate
train_step = optimizer.minimize(mean_loss)

# batch normalization in tensorflow requires this extra dependency
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(extra_update_ops):
    train_step = optimizer.minimize(mean_loss)

## Sanity Check

In [7]:
x = np.random.randn(64, 128, 128, 3)
with tf.Session() as sess:
    with tf.device("/cpu:0"): #"/cpu:0" or "/gpu:0"
        tf.global_variables_initializer().run()

        ans = sess.run(y_out,feed_dict={X:x,is_training:True})
        print(ans.shape)
        print(np.array_equal(ans.shape, np.array([64, 20])))

(64, 20)
True


## Run the Model

In [8]:
# Run the model returning total loss and total correct
def run_model(session, predict, loss_val, Xd, yd,
              epochs=1, batch_size=32, print_every=100,
              training=None, plot_losses=False):
    
    # have tensorflow compute accuracy
    correct_prediction = tf.equal(tf.argmax(predict,1), y)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    # shuffle indicies
    train_indicies = np.arange(Xd.shape[0])
    np.random.shuffle(train_indicies)

    training_now = training is not None
    
    # setting up variables we want to compute (and optimizing)
    # if we have a training function, add that to things we compute
    variables = [mean_loss,correct_prediction,accuracy]
    if training_now:
        variables[-1] = training
    
    # counter 
    iter_cnt = 0
    for e in range(epochs):
        # keep track of losses and accuracy
        correct = 0
        losses = []
        # make sure we iterate over the dataset once
        for i in range(int(math.ceil(Xd.shape[0]/batch_size))):
            # generate indicies for the batch
            start_idx = (i*batch_size)%Xd.shape[0]
            idx = train_indicies[start_idx:start_idx+batch_size]
            
            # create a feed dictionary for this batch
            feed_dict = {X: Xd[idx,:],
                         y: yd[idx],
                         is_training: training_now }
            # get batch size
            actual_batch_size = yd[idx].shape[0]
            
            # have tensorflow compute loss and correct predictions
            # and (if given) perform a training step
            loss, corr, _ = session.run(variables,feed_dict=feed_dict)
            
            # aggregate performance stats
            losses.append(loss*actual_batch_size)
            correct += np.sum(corr)
            
            # print every now and then
            if training_now and (iter_cnt % print_every) == 0:
                print("Iteration {0}: with minibatch training loss = {1:.3g} and accuracy of {2:.2g}"\
                      .format(iter_cnt,loss,np.sum(corr)/actual_batch_size))
            iter_cnt += 1
        total_correct = correct/Xd.shape[0]
        total_loss = np.sum(losses)/Xd.shape[0]
        print("Epoch {2}, Overall loss = {0:.3g} and accuracy of {1:.3g}"\
              .format(total_loss,total_correct,e+1))
        if plot_losses:
            plt.plot(losses)
            plt.grid(True)
            plt.title('Epoch {} Loss'.format(e+1))
            plt.xlabel('minibatch number')
            plt.ylabel('minibatch loss')
            plt.show()
    return total_loss,total_correct

In [13]:
sess = tf.Session()

sess.run(tf.global_variables_initializer())
print('Training')
run_model(sess,y_out,mean_loss,X_train,y_train,1,32,1,train_step,True)
print('Validation')
run_model(sess,y_out,mean_loss,X_val,y_val,1,32)

Training


InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float
	 [[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

Caused by op u'Placeholder', defined at:
  File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/runpy.py", line 162, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/Library/Python/2.7/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/Library/Python/2.7/site-packages/traitlets/config/application.py", line 592, in launch_instance
    app.start()
  File "/Library/Python/2.7/site-packages/ipykernel/kernelapp.py", line 403, in start
    ioloop.IOLoop.instance().start()
  File "/Library/Python/2.7/site-packages/zmq/eventloop/ioloop.py", line 162, in start
    super(ZMQIOLoop, self).start()
  File "/Library/Python/2.7/site-packages/tornado/ioloop.py", line 883, in start
    handler_func(fd_obj, events)
  File "/Library/Python/2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Library/Python/2.7/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/Library/Python/2.7/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/Library/Python/2.7/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/Library/Python/2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Library/Python/2.7/site-packages/ipykernel/kernelbase.py", line 260, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Library/Python/2.7/site-packages/ipykernel/kernelbase.py", line 212, in dispatch_shell
    handler(stream, idents, msg)
  File "/Library/Python/2.7/site-packages/ipykernel/kernelbase.py", line 370, in execute_request
    user_expressions, allow_stdin)
  File "/Library/Python/2.7/site-packages/ipykernel/ipkernel.py", line 175, in do_execute
    shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Library/Python/2.7/site-packages/IPython/core/interactiveshell.py", line 2902, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Library/Python/2.7/site-packages/IPython/core/interactiveshell.py", line 3006, in run_ast_nodes
    if self.run_code(code, result):
  File "/Library/Python/2.7/site-packages/IPython/core/interactiveshell.py", line 3066, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-3-bb843b1ded95>", line 29, in <module>
    X = tf.placeholder(tf.float32, [None, 128, 128, 3])
  File "/Library/Python/2.7/site-packages/tensorflow/python/ops/array_ops.py", line 1507, in placeholder
    name=name)
  File "/Library/Python/2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 1997, in _placeholder
    name=name)
  File "/Library/Python/2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
    op_def=op_def)
  File "/Library/Python/2.7/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/Library/Python/2.7/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
    self._traceback = _extract_stack()

InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'Placeholder' with dtype float
	 [[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
