In [10]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

#Use CPU because of GPU OOM issue
config = tf.ConfigProto(
    allow_soft_placement=True,
    log_device_placement=True
#     device_count = {'GPU':0}
)
config.gpu_options.per_process_gpu_memory_fraction = 0.5    
config.gpu_options.allow_growth = True      
config.gpu_options.allocator_type = 'BFC'  
sess = tf.InteractiveSession(config=config)
# sess = tf.InteractiveSession()

x = tf.placeholder(tf.float32, [None, 784]) #x is placeholder for 28 * 28 image
y_ = tf.placeholder(tf.float32, [None, 10])
x_image = tf.reshape (x, [-1,28,28,1], name="x_image") #change input data from a list to a 28 x 28 x 1 grayscale cube

def weight_variable(shape):
    initial = tf.truncated_normal(shape,stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') #k is kernel
# Define layers
W_conv1 = weight_variable([5, 5, 1, 32]) #32 features for each 5x5 size of filer, 1 channel grayscale
b_conv1 = bias_variable([32])

h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) # convolution
h_pool1 = max_pool_2x2(h_conv1) # max pooling
W_conv2 = weight_variable([5, 5, 32, 64]) #64 features
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
# Fully Connected Layer
W_fc1 = weight_variable([7 * 7 * 64, 1024]) # 7x7 images by 64 features to 1024 neuron 
b_fc1 = bias_variable([1024])
# connect output of pooling 2 to fc layer
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
#dropout
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
#Readout
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

#Define model
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

#Loss measurement
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_conv,labels=y_))
                               
#Loss optimization
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
#correct
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
#accurate
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess.run(tf.global_variables_initializer())

#Train
import time

num_steps = 20000
display_every = 100

#start timer
start_time = time.time()
end_time = time.time()

for i in range(num_steps):
    batch = mnist.train.next_batch(50)
    train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob:0.5})
    
    if i%display_every == 0:
        train_accuracy = accuracy.eval(feed_dict={x:batch[0], y_:batch[1], keep_prob: 1.0})
        end_time = time.time()
        print("step {0}, elapsed time {1:.2f} seconds, training accuracy {2:.3f}%".format(i, end_time - start_time, train_accuracy*100))
#Display summary
end_time = time.time()
print("Total training time for {0} batches: {1:.2f} seconds".format(i+1, end_time-start_time))

# Accuracy on test data
# print("Test accuracy {0:.3f}%".format(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0})*100.0))
# mnist.test.image.shape is (10000,784).In a 10k batch, it takes 2GB for this layer and lead to OOM
batch_size = 50
batch_num = int(mnist.test.num_examples / batch_size)
test_accuracy = 0
    
for i in range(batch_num):
    batch = mnist.test.next_batch(batch_size)
    test_accuracy += accuracy.eval(feed_dict={x: batch[0],
                                              y_: batch[1],
                                              keep_prob: 1.0})

test_accuracy /= batch_num
print("Test accuracy {0:.3f}%".format(test_accuracy*100.0))
sess.close()

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
step 0, elapsed time 0.34 seconds, training accuracy 36.000%
step 100, elapsed time 1.07 seconds, training accuracy 84.000%
step 200, elapsed time 1.79 seconds, training accuracy 86.000%
step 300, elapsed time 2.50 seconds, training accuracy 90.000%
step 400, elapsed time 3.21 seconds, training accuracy 94.000%
step 500, elapsed time 3.91 seconds, training accuracy 94.000%
step 600, elapsed time 4.62 seconds, training accuracy 98.000%
step 700, elapsed time 5.33 seconds, training accuracy 100.000%
step 800, elapsed time 6.03 seconds, training accuracy 92.000%
step 900, elapsed time 6.76 seconds, training accuracy 98.000%
step 1000, elapsed time 7.46 seconds, training accuracy 100.000%
step 1100, elapsed time 8.25 seconds, training accuracy 100.000%
step 1200, elapsed time 8.95 seconds, training 

step 12200, elapsed time 87.82 seconds, training accuracy 100.000%
step 12300, elapsed time 88.53 seconds, training accuracy 100.000%
step 12400, elapsed time 89.24 seconds, training accuracy 100.000%
step 12500, elapsed time 89.95 seconds, training accuracy 100.000%
step 12600, elapsed time 90.65 seconds, training accuracy 100.000%
step 12700, elapsed time 91.37 seconds, training accuracy 100.000%
step 12800, elapsed time 92.08 seconds, training accuracy 100.000%
step 12900, elapsed time 92.79 seconds, training accuracy 100.000%
step 13000, elapsed time 93.50 seconds, training accuracy 100.000%
step 13100, elapsed time 94.21 seconds, training accuracy 100.000%
step 13200, elapsed time 95.00 seconds, training accuracy 100.000%
step 13300, elapsed time 95.71 seconds, training accuracy 100.000%
step 13400, elapsed time 96.43 seconds, training accuracy 100.000%
step 13500, elapsed time 97.14 seconds, training accuracy 100.000%
step 13600, elapsed time 97.85 seconds, training accuracy 100.

step 0, elapsed time 0.84 seconds, training accuracy 14.000%
step 100, elapsed time 1.58 seconds, training accuracy 92.000%
step 200, elapsed time 2.30 seconds, training accuracy 94.000%
step 300, elapsed time 3.02 seconds, training accuracy 94.000%
step 400, elapsed time 3.74 seconds, training accuracy 92.000%
step 500, elapsed time 4.46 seconds, training accuracy 94.000%
step 600, elapsed time 5.17 seconds, training accuracy 98.000%
step 700, elapsed time 5.90 seconds, training accuracy 98.000%
Total training time for 800 batches: 6.61 seconds


ResourceExhaustedError: OOM when allocating tensor with shape[10000,32,28,28]
	 [[Node: Conv2D = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](x_image, Variable/read)]]
	 [[Node: Mean_1/_27 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_79_Mean_1", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op 'Conv2D', defined at:
  File "F:\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "F:\Anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "F:\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "F:\Anaconda3\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "F:\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "F:\Anaconda3\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "F:\Anaconda3\lib\site-packages\tornado\ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "F:\Anaconda3\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "F:\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "F:\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "F:\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "F:\Anaconda3\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "F:\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "F:\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "F:\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "F:\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "F:\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "F:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2698, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "F:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2802, in run_ast_nodes
    if self.run_code(code, result):
  File "F:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-3-6860197c073b>", line 5, in <module>
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) # convolution
  File "<ipython-input-2-d0abcb5194e7>", line 10, in conv2d
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
  File "F:\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_nn_ops.py", line 630, in conv2d
    data_format=data_format, name=name)
  File "F:\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "F:\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 2956, in create_op
    op_def=op_def)
  File "F:\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[10000,32,28,28]
	 [[Node: Conv2D = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](x_image, Variable/read)]]
	 [[Node: Mean_1/_27 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_79_Mean_1", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]


step 0, elapsed time 0.01 seconds, training accuracy 20.000%
step 100, elapsed time 0.73 seconds, training accuracy 84.000%
step 200, elapsed time 1.44 seconds, training accuracy 78.000%
step 300, elapsed time 2.28 seconds, training accuracy 90.000%
step 400, elapsed time 2.98 seconds, training accuracy 90.000%
step 500, elapsed time 3.69 seconds, training accuracy 92.000%
step 600, elapsed time 4.40 seconds, training accuracy 96.000%
step 700, elapsed time 5.10 seconds, training accuracy 94.000%
step 800, elapsed time 5.81 seconds, training accuracy 98.000%
step 900, elapsed time 6.51 seconds, training accuracy 92.000%
step 1000, elapsed time 7.21 seconds, training accuracy 98.000%
step 1100, elapsed time 7.92 seconds, training accuracy 96.000%
step 1200, elapsed time 8.62 seconds, training accuracy 96.000%
step 1300, elapsed time 9.34 seconds, training accuracy 100.000%
step 1400, elapsed time 10.13 seconds, training accuracy 96.000%
step 1500, elapsed time 10.87 seconds, training ac