In [1]:
import tensorflow as tf
import numpy as np
import os
from utils import BatchGenerator
import utils
# from SamplingRNN import SamplingRNNCell
slim = tf.contrib.slim

  from ._conv import register_converters as _register_converters


# Configeration

These are some very basic parameters for constructing training the model. In the future, it would be very helper to test different parameters. 

In [2]:
SEQ_LEN = 10 # this parameter can be changed. TODO: try longer sequences if memory is available.
BATCH_SIZE = 4 # this parameter can also be changed
LEFT_CONTEXT = 5

HEIGHT = 480
WIDTH = 640
CHANNELS = 3

RNN_SIZE = 32
RNN_PROJ = 32

CSV_HEADER = "index, timestamp, width, height, frame_id, filename, angle, torque, speed, lat, long, alt".split(",")
OUTPUTS = CSV_HEADER[-6:-3] # angle,torque,speed
OUTPUT_DIM = len(OUTPUTS) # predict all features

# Loading Data

In [3]:
TRAIN_DS_PATH = "/home/neil/dataset/udacity/main.csv"
TEST_DS_PATH = "/home/neil/dataset/udacity/test.csv"

(train_seq, valid_seq), (mean, std) = utils.process_csv(filename=TRAIN_DS_PATH, val=5)  # concatenated interpolated.csv from rosbags

# interpolated.csv for testset filled with dummy values
test_seq = utils.read_csv(TEST_DS_PATH)

print(len(list(test_seq)))

train sequence length: 41820
val sequence length: 2200
means: [-0.03240883 -0.058797   14.62765503]
standard deviation: [0.38912819 0.7488271  6.4487334 ]
4777


In [4]:
layer_norm = lambda x: tf.contrib.layers.layer_norm(inputs=x, center=True, scale=True, activation_fn=None, trainable=True)

def apply_vision_simple(image, keep_prob, batch_size, seq_len, scope=None, reuse=None):
    video = tf.reshape(image, shape=[batch_size, LEFT_CONTEXT + seq_len, HEIGHT, WIDTH, CHANNELS])
    with tf.variable_scope(scope, 'Vision', [image], reuse=reuse):
            net = slim.convolution(video, num_outputs=64, kernel_size=[3,12,12], stride=[1,6,6], padding="VALID")
            net = tf.nn.dropout(x=net, keep_prob=keep_prob)
            aux1 = slim.fully_connected(tf.reshape(net[:, -seq_len:, :, :, :], [batch_size, seq_len, -1]), 128, activation_fn=None)
            net = slim.convolution(net, num_outputs=64, kernel_size=[2,5,5], stride=[1,2,2], padding="VALID")
            net = tf.nn.dropout(x=net, keep_prob=keep_prob)
            aux2 = slim.fully_connected(tf.reshape(net[:, -seq_len:, :, :, :], [batch_size, seq_len, -1]), 128, activation_fn=None)
            net = slim.convolution(net, num_outputs=64, kernel_size=[2,5,5], stride=[1,1,1], padding="VALID")
            net = tf.nn.dropout(x=net, keep_prob=keep_prob)
            aux3 = slim.fully_connected(tf.reshape(net[:, -seq_len:, :, :, :], [batch_size, seq_len, -1]), 128, activation_fn=None)
            net = slim.convolution(net, num_outputs=64, kernel_size=[2,5,5], stride=[1,1,1], padding="VALID")
            net = tf.nn.dropout(x=net, keep_prob=keep_prob)
            print(net) # TODO must be batch_size x seq_len x ...
            aux4 = slim.fully_connected(tf.reshape(net, [batch_size, seq_len, -1]), 128, activation_fn=None)
            net = slim.fully_connected(tf.reshape(net, [batch_size, seq_len, -1]), 1024, activation_fn=tf.nn.relu)
            net = tf.nn.dropout(x=net, keep_prob=keep_prob)
            net = slim.fully_connected(net, 512, activation_fn=tf.nn.relu)
            net = tf.nn.dropout(x=net, keep_prob=keep_prob)
            net = slim.fully_connected(net, 256, activation_fn=tf.nn.relu)
            net = tf.nn.dropout(x=net, keep_prob=keep_prob)
            net = slim.fully_connected(net, 128, activation_fn=None)
            return layer_norm(tf.nn.elu(net + aux1 + aux2 + aux3 + aux4))
        
        
class SamplingRNNCell(tf.nn.rnn_cell.RNNCell):
    
    """Simple sampling RNN cell."""
    def __init__(self, num_outputs, use_ground_truth, internal_cell, keep_prob):
        """
        if use_ground_truth then don't sample
        """
        self._num_outputs = num_outputs
        self._use_ground_truth = use_ground_truth
        self._internal_cell = internal_cell
        self._keep_prob = keep_prob
  
    @property
    def state_size(self):
        return self._num_outputs, self._internal_cell.state_size # previous output and bottleneck state

    @property
    def output_size(self):
        return self._num_outputs

    def __call__(self, inputs, state, scope=None):
        (visual_feats, current_ground_truth) = inputs
        prev_output, prev_state_internal = state
        # the following is just for a baseline
        context = tf.concat([prev_output, visual_feats], 1)
        new_output_internal, new_state_internal = internal_cell(context, prev_state_internal)
        new_output = tf.contrib.layers.fully_connected(inputs=tf.concat([new_output_internal, prev_output, visual_feats], 1), 
                                                       num_outputs=self._num_outputs, activation_fn=None, scope="OutputProjection")
        return new_output, (current_ground_truth if self._use_ground_truth else new_output, new_state_internal)
    
    

# Building the Graph

In [5]:
graph = tf.Graph()

with graph.as_default():
    # inputs
    learning_rate = tf.placeholder_with_default(input=1e-4, shape=())
    keep_prob = tf.placeholder_with_default(input=1.0, shape=())

    inputs = tf.placeholder(shape=(BATCH_SIZE, LEFT_CONTEXT + SEQ_LEN),
                                    dtype=tf.string)  # pathes to png files from the central camera
    targets = tf.placeholder(shape=(BATCH_SIZE, SEQ_LEN, OUTPUT_DIM),
                                     dtype=tf.float32)  # seq_len x batch_size x OUTPUT_DIM
    targets_normalized = (targets - mean) / std

    input_images = tf.stack([tf.image.decode_png(tf.read_file(x)) for x in 
                             tf.unstack(tf.reshape(inputs, shape=[(LEFT_CONTEXT + SEQ_LEN) * BATCH_SIZE]))])
    input_images = -1.0 + 2.0 * tf.cast(input_images, tf.float32) / 255.0
    input_images.set_shape([(LEFT_CONTEXT + SEQ_LEN) * BATCH_SIZE, HEIGHT, WIDTH, CHANNELS])
    # input_images = tf.identity(input_images, name="input_images")

    visual_conditions_reshaped = apply_vision_simple(image=input_images, keep_prob=keep_prob, 
                                                     batch_size=BATCH_SIZE, seq_len=SEQ_LEN)
    visual_conditions = tf.reshape(visual_conditions_reshaped, [BATCH_SIZE, SEQ_LEN, -1])
    visual_conditions = tf.nn.dropout(x=visual_conditions, keep_prob=keep_prob)

    rnn_inputs_with_ground_truth = (visual_conditions, targets_normalized)
    rnn_inputs_autoregressive = (visual_conditions, tf.zeros(shape=(BATCH_SIZE, SEQ_LEN, OUTPUT_DIM), dtype=tf.float32))

    internal_cell = tf.nn.rnn_cell.LSTMCell(num_units=RNN_SIZE, num_proj=RNN_PROJ)
    cell_with_ground_truth = SamplingRNNCell(num_outputs=OUTPUT_DIM, use_ground_truth=True,
                                             internal_cell=internal_cell, keep_prob=keep_prob)
    cell_autoregressive = SamplingRNNCell(num_outputs=OUTPUT_DIM, use_ground_truth=False,
                                          internal_cell=internal_cell, keep_prob=keep_prob)

    controller_initial_state_variables = utils.get_initial_state(cell_autoregressive.state_size)
    controller_initial_state_autoregressive = utils.deep_copy_initial_state(controller_initial_state_variables)
    controller_initial_state_gt = utils.deep_copy_initial_state(controller_initial_state_variables)

    with tf.variable_scope("predictor"):
        out_gt, controller_final_state_gt = tf.nn.dynamic_rnn(cell=cell_with_ground_truth,
                                                                      inputs=rnn_inputs_with_ground_truth,
                                                                      sequence_length=[SEQ_LEN] * BATCH_SIZE,
                                                                      initial_state=controller_initial_state_gt,
                                                                      dtype=tf.float32,
                                                                      swap_memory=True, time_major=False)
    with tf.variable_scope("predictor", reuse=True):
        out_autoregressive, controller_final_state_autoregressive = tf.nn.dynamic_rnn(cell=cell_autoregressive, 
                                                                                      inputs=rnn_inputs_autoregressive,
                                                                                      sequence_length=[SEQ_LEN] * BATCH_SIZE,
                                                                                      initial_state=controller_initial_state_autoregressive, 
                                                                                      dtype=tf.float32, 
                                                                                      swap_memory=True,
                                                                                      time_major=False)

    mse_gt = tf.reduce_mean(tf.squared_difference(out_gt, targets_normalized))
    mse_autoregressive = tf.reduce_mean(tf.squared_difference(out_autoregressive, targets_normalized))
    mse_autoregressive_steering = tf.reduce_mean(tf.squared_difference(out_autoregressive[:, :, 0], targets_normalized[:, :, 0]))
    steering_predictions = (out_autoregressive[:, :, 0] * std[0]) + mean[0]

    total_loss = mse_autoregressive_steering  # + 0.1 * (mse_gt + mse_autoregressive)

    optimizer = utils.get_optimizer(total_loss, learning_rate)

    tf.summary.scalar("MAIN TRAIN METRIC: rmse_autoregressive_steering", tf.sqrt(mse_autoregressive_steering))
    tf.summary.scalar("rmse_gt", tf.sqrt(mse_gt))
    tf.summary.scalar("rmse_autoregressive", tf.sqrt(mse_autoregressive))

    summaries = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter('v3/train_summary', graph=graph)
    valid_writer = tf.summary.FileWriter('v3/valid_summary', graph=graph)
    saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)

for n in tf.get_default_graph().as_graph_def().node:
    print(hello)
    print(n.name)


Tensor("Vision/dropout_3/mul:0", shape=(4, 10, 30, 43, 64), dtype=float32)
[u'Vision/Conv/weights:0', u'Vision/Conv/biases:0', u'Vision/fully_connected/weights:0', u'Vision/fully_connected/biases:0', u'Vision/Conv_1/weights:0', u'Vision/Conv_1/biases:0', u'Vision/fully_connected_1/weights:0', u'Vision/fully_connected_1/biases:0', u'Vision/Conv_2/weights:0', u'Vision/Conv_2/biases:0', u'Vision/fully_connected_2/weights:0', u'Vision/fully_connected_2/biases:0', u'Vision/Conv_3/weights:0', u'Vision/Conv_3/biases:0', u'Vision/fully_connected_3/weights:0', u'Vision/fully_connected_3/biases:0', u'Vision/fully_connected_4/weights:0', u'Vision/fully_connected_4/biases:0', u'Vision/fully_connected_5/weights:0', u'Vision/fully_connected_5/biases:0', u'Vision/fully_connected_6/weights:0', u'Vision/fully_connected_6/biases:0', u'Vision/fully_connected_7/weights:0', u'Vision/fully_connected_7/biases:0', u'Vision/LayerNorm/beta:0', u'Vision/LayerNorm/gamma:0', u'controller_initial_state_0:0', u'cont

# Pre-training

In [6]:
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)

checkpoint_dir = os.getcwd() + "/v3"

global_train_step = 0
global_valid_step = 0
global_valid_predictions = {}

KEEP_PROB_TRAIN = 0.25

def do_epoch(session, sequences, mode):
    
    global global_train_step, global_valid_step, global_valid_predictions
    test_predictions = {}
    batch_generator = BatchGenerator(sequence=sequences, seq_len=SEQ_LEN, batch_size=BATCH_SIZE)
    total_num_steps = int(1 + (batch_generator.indices[1] - 1) / SEQ_LEN)
    if mode == "valid":
        total_num_steps = 150
    controller_final_state_gt_cur, controller_final_state_autoregressive_cur = None, None
    acc_loss = np.float128(0.0)
    
    for step in range(total_num_steps):
        
        feed_inputs, feed_targets = batch_generator.next()
        feed_dict = {inputs : feed_inputs, targets : feed_targets}
        if controller_final_state_autoregressive_cur is not None:
            feed_dict.update({controller_initial_state_autoregressive : controller_final_state_autoregressive_cur})
        if controller_final_state_gt_cur is not None:
            feed_dict.update({controller_final_state_gt : controller_final_state_gt_cur})
        
        if mode == "train":
            feed_dict.update({keep_prob : KEEP_PROB_TRAIN})
            summary, _, loss, controller_final_state_gt_cur, controller_final_state_autoregressive_cur = \
                session.run([summaries, optimizer, mse_autoregressive_steering, controller_final_state_gt, controller_final_state_autoregressive],
                           feed_dict = feed_dict)
            train_writer.add_summary(summary, global_train_step)
            global_train_step += 1
        
        elif mode == "valid":
            model_predictions, summary, loss, controller_final_state_autoregressive_cur = \
                session.run([steering_predictions, summaries, mse_autoregressive_steering, controller_final_state_autoregressive],
                           feed_dict = feed_dict)
            valid_writer.add_summary(summary, global_valid_step)
            global_valid_step += 1
            
            feed_inputs = feed_inputs[:, LEFT_CONTEXT:].flatten()
            steering_targets = feed_targets[:, :, 0].flatten()
            model_predictions = model_predictions.flatten()
            stats = np.stack([steering_targets, model_predictions, (steering_targets - model_predictions)**2])
            for i, img in enumerate(feed_inputs):
                global_valid_predictions[img] = stats[:, i]
        
        elif mode == "test":
            model_predictions, controller_final_state_autoregressive_cur = \
                session.run([steering_predictions, controller_final_state_autoregressive],
                           feed_dict = feed_dict)           
            feed_inputs = feed_inputs[:, LEFT_CONTEXT:].flatten()
            model_predictions = model_predictions.flatten()
            for i, img in enumerate(feed_inputs):
                test_predictions[img] = model_predictions[i]
        
        if mode != "test":
            acc_loss += loss
            if (step + 1) % 10 == 0:
                print(str(step + 1) + "/" + str(total_num_steps) + " " + str(np.sqrt(acc_loss / (step+1))))

    return np.sqrt(acc_loss / total_num_steps) if mode != "test" else test_predictions
            

# Training

In [7]:
NUM_EPOCHS=10

best_validation_score = None

with tf.Session(graph=graph, config=tf.ConfigProto(gpu_options=gpu_options)) as session:
    session.run(tf.initialize_all_variables())
    print('Initialized')
    ckpt = tf.train.latest_checkpoint(checkpoint_dir)
    
    if ckpt:
        print("Restoring from", ckpt)
        saver.restore(sess=session, save_path=ckpt)
    
    for epoch in range(NUM_EPOCHS):
        print("Starting epoch %d" % epoch)
        print("Validation:")
        valid_score = do_epoch(session=session, sequences=valid_seq, mode="valid")
        if best_validation_score is None: 
            best_validation_score = valid_score
            with open("v3/test-predictions-epoch%d" % epoch, "w") as out:
                test_predictions = do_epoch(session=session, sequences=test_seq, mode="test")
                # print >> out, "frame_id,steering_angle"
                for img, pred in test_predictions.items():
                    img = img.replace("challenge_2/Test-final/center/", "")
                    # print >> out, "%s,%f" % (img, pred)
        
        if valid_score < best_validation_score:
            saver.save(session, 'v3/checkpoint-sdc-ch2')
            best_validation_score = valid_score
            print("SAVED at epoch %d" % epoch) 
            with open("v3/valid-predictions-epoch%d" % epoch, "w") as out:
                result = np.float128(0.0)
                for img, stats in global_valid_predictions.items():
                    # print >> out, img, stats
                    result += stats[-1]
            print("Validation unnormalized RMSE:", np.sqrt(result / len(global_valid_predictions)))
            with open("v3/test-predictions-epoch%d" % epoch, "w") as out:
                test_predictions = do_epoch(session=session, sequences=test_seq, mode="test")
                # print >> out, "frame_id,steering_angle"
                for img, pred in test_predictions.items():
                    img = img.replace("challenge_2/Test-final/center/", "")
                    # print >> out, "%s,%f" % (img, pred)
        if epoch != NUM_EPOCHS - 1:
            print("Training")
            do_epoch(session=session, sequences=train_seq, mode="train")

Instructions for updating:
Use `tf.global_variables_initializer` instead.
Initialized
('Restoring from', u'/home/neil/Workspace/self-driving-golf-cart/src/steering_control/scripts/jupyter/v3/checkpoint-sdc-ch2')
INFO:tensorflow:Restoring parameters from /home/neil/Workspace/self-driving-golf-cart/src/steering_control/scripts/jupyter/v3/checkpoint-sdc-ch2
Starting epoch 0
Validation:
10/150 0.47908098493100445634
20/150 0.42037870885396545114
30/150 0.5145103498554882085
40/150 0.5766859113415133317
50/150 0.5890816466612050701
60/150 0.56736453087615999774
70/150 0.5499879189540764037
80/150 0.52848484655602881113
90/150 0.55980070525823668313
100/150 0.5825234626451896705
110/150 0.5737444865065625622
120/150 0.5665473544361709029
130/150 0.5530077227255476171
140/150 0.5616528041686762808
150/150 0.57459888130994673047
Training


ResourceExhaustedError: OOM when allocating tensor with shape[82560,1024] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: gradients/Vision/fully_connected_4/Tensordot/MatMul_grad/MatMul_1 = MatMul[T=DT_FLOAT, transpose_a=true, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](Vision/fully_connected_4/Tensordot/Reshape, gradients/Vision/fully_connected_4/Tensordot_grad/Reshape)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


Caused by op u'gradients/Vision/fully_connected_4/Tensordot/MatMul_grad/MatMul_1', defined at:
  File "/usr/lib/python2.7/runpy.py", line 174, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/usr/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python2.7/dist-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/usr/local/lib/python2.7/dist-packages/tornado/ioloop.py", line 1008, in start
    self._run_callback(self._callbacks.popleft())
  File "/usr/local/lib/python2.7/dist-packages/tornado/ioloop.py", line 759, in _run_callback
    ret = callback()
  File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 536, in <lambda>
    self.io_loop.add_callback(lambda : self._handle_events(self.socket, 0))
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2714, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2818, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2878, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-5-6ba41c422985>", line 61, in <module>
    optimizer = utils.get_optimizer(total_loss, learning_rate)
  File "utils.py", line 114, in get_optimizer
    gradvars = optimizer.compute_gradients(loss)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 526, in compute_gradients
    colocate_gradients_with_ops=colocate_gradients_with_ops)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gradients_impl.py", line 494, in gradients
    gate_gradients, aggregation_method, stop_gradients)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gradients_impl.py", line 636, in _GradientsHelper
    lambda: grad_fn(op, *out_grads))
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gradients_impl.py", line 385, in _MaybeCompile
    return grad_fn()  # Exit early
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gradients_impl.py", line 636, in <lambda>
    lambda: grad_fn(op, *out_grads))
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_grad.py", line 1047, in _MatMulGrad
    grad_b = gen_math_ops.mat_mul(a, grad, transpose_a=True)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_math_ops.py", line 4279, in mat_mul
    name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 3392, in create_op
    op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1718, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

...which was originally created as op u'Vision/fully_connected_4/Tensordot/MatMul', defined at:
  File "/usr/lib/python2.7/runpy.py", line 174, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
[elided 19 identical lines from previous traceback]
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2878, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-5-6ba41c422985>", line 21, in <module>
    batch_size=BATCH_SIZE, seq_len=SEQ_LEN)
  File "<ipython-input-4-202f51a1dafa>", line 19, in apply_vision_simple
    net = slim.fully_connected(tf.reshape(net, [batch_size, seq_len, -1]), 1024, activation_fn=tf.nn.relu)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 183, in func_with_args
    return func(*args, **current_args)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1716, in fully_connected
    outputs = layer.apply(inputs)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/base.py", line 828, in apply
    return self.__call__(inputs, *args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/base.py", line 717, in __call__
    outputs = self.call(inputs, *args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/core.py", line 157, in call
    [0]])
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_ops.py", line 3004, in tensordot
    ab_matmul = matmul(a_reshape, b_reshape)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_ops.py", line 2122, in matmul
    a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_math_ops.py", line 4279, in mat_mul
    name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 3392, in create_op
    op_def=op_def)

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[82560,1024] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: gradients/Vision/fully_connected_4/Tensordot/MatMul_grad/MatMul_1 = MatMul[T=DT_FLOAT, transpose_a=true, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](Vision/fully_connected_4/Tensordot/Reshape, gradients/Vision/fully_connected_4/Tensordot_grad/Reshape)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

