# MNIST by Convolution NN (LeNet-5)
* 2 conv([5,5],64) 2 maxpooling(strides=2) FC(1024)
* input=[batch,length,width,deep]
* Convolution kernel(w) = [length,width,deep_in,deep_out]
* strides = [1,step,step,1]
* new_height = new_width = (input — kernel + 1) / Strides

In [20]:
#import data and library
import numpy as np
import pandas as pd 
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
old_v = tf.logging.get_verbosity()
tf.logging.set_verbosity(tf.logging.ERROR)

mnist = input_data.read_data_sets("MNIST_data/", one_hot = True)

train_data = mnist.train.images  # Returns np.array
train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
test_data = mnist.test.images  # Returns np.array
test_labels = np.asarray(mnist.test.labels, dtype=np.int32)

tf.logging.set_verbosity(old_v)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [21]:
#def feature map and pooling


def weight_variable(name,shape):
    initializer = tf.contrib.layers.xavier_initializer()
    return tf.get_variable(name,shape,initializer=initializer,dtype=tf.float32)

def bias_variable(name,shape):
    initializer = tf.constant_initializer(0.0)
    return tf.get_variable(name, shape, initializer=initializer, dtype=tf.float32)

def conv2d(inputs, w):
    return tf.nn.conv2d(inputs,w,strides = [1,1,1,1],padding = 'SAME')

def max_pooling_2x2(inputs,x=2,y=2):
    ksize = [1,x,y,1]
    return tf.nn.max_pool(inputs,ksize,strides = [1,2,2,1],padding = 'SAME')

In [22]:
#set outlines 
inputs_num = 784 # 28*28 =784  (784,1)
outputs_num = 10 # 0 to 9   

#reset variable in tensorflow
tf.reset_default_graph()

#set placeholder(input and target)
inputs= tf.placeholder(tf.float32,[None,inputs_num],)
targets= tf.placeholder(tf.float32,[None,outputs_num],)
#transfer image to 4D data([batch,in_height,in_width,in_channels])
inputs_image = tf.reshape(inputs,[-1,28,28,1])

#set 1st convolution layer w and b
w_conv1 = weight_variable('w_1',[5,5,1,32])
b_conv1 = bias_variable('b_1',[32])
#set 2nd convolution layer w and b
w_conv2 = weight_variable('w_2',[5,5,32,64])
b_conv2 = bias_variable('b_2',[64])

#convolution layer and max pooling (relu activation function) 2 times pooling 28 ->14 ->7
h_conv1 = tf.nn.relu(conv2d(inputs_image,w_conv1)+b_conv1)
h_pool1 = max_pooling_2x2(h_conv1)

h_conv2 = tf.nn.relu(conv2d(h_pool1,w_conv2)+b_conv2)
h_pool2 = max_pooling_2x2(h_conv2)

#set 1st fully connected variable(last layer 7*7*64, 1024 nuron)
fc_w1 = weight_variable('fc_w1',[7*7*64,1024])
fc_b1 = bias_variable('fc_b1',[1024])

#flatten
h_pool2_flatten = tf.reshape(h_pool2,[-1,7*7*64])

# fully connected 1
fc_1 = tf.nn.relu(tf.matmul(h_pool2_flatten,fc_w1)+fc_b1)

# dropout(testing need turn off)
keep_prop = tf.placeholder(tf.float32)
fc_1_drop = tf.nn.dropout(fc_1,keep_prop)

#set 2nd fully connected variable(last layer 1024, 10 nuron)
fc_w2 = weight_variable('fc_w2',[1024,10])
fc_b2 = bias_variable('fc_b2',[10])

#predicted outputs
outputs = tf.nn.softmax(tf.matmul(fc_1_drop,fc_w2)+fc_b2)

# set loss and optimize object
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=outputs, labels=targets))

optimizer = tf.train.AdamOptimizer(learning_rate=0.003,).minimize(loss)

#after softmax, let probability to 0 or 1 by argmax function, and judge between target and output
out_eql_target = tf.equal(tf.argmax(outputs,1), tf.argmax(targets,1))

#predicte accuracy (need cast to float32)
accuracy = tf.reduce_mean(tf.cast(out_eql_target,tf.float32))

#框架完成

#sess
sess= tf.InteractiveSession()

#initial variable
initializer = tf.global_variables_initializer()
sess.run(initializer)

#set training paramerter
epochs = 20
batch_num = 5 00
batch_cycle = mnist.train._num_examples // batch_num
prev_validation = 9999999.

#Training model
import time
start = time.time()
for epoch_counter in range(epochs):
    curr_epoc_loss = 0.
    for batch_counter in range(batch_cycle):
        input_batch, target_batch = mnist.train.next_batch(batch_num)
        _,batch_loss = sess.run([optimizer,loss],
                                feed_dict={inputs:input_batch,targets: target_batch,keep_prop:0.9})
        curr_epoc_loss += batch_loss
    curr_epoc_loss/=batch_cycle
    # validation
    input_batch, target_batch = mnist.validation.next_batch(mnist.validation._num_examples)   
    val_loss, val_accuracy = sess.run([loss, accuracy], feed_dict ={inputs: input_batch, targets: target_batch,keep_prop:0.9})
    if val_loss>prev_validation:
        break
    prev_validation = val_loss
    print(f'Epoch cycle: {epoch_counter + 1}'+
         f' Training loss: {curr_epoc_loss:.3f}'+
         f' Validation loss: {val_loss:.3f}'+
         f' Validation accuracy: {val_accuracy*100.:.2f}'+'%'+
         f' times: {time.time() - start}')

    #TEST
    input_batch, target_batch = mnist.test.next_batch(3000)
    test_accuracy = sess.run([accuracy],
                             feed_dict ={inputs:input_batch,targets:target_batch,keep_prop:0.9})
    test_accuracy_percent=test_accuracy[0]*100.
    print(f'test accuracy: {test_accuracy_percent:.2f}')
print('end of training')


ResourceExhaustedError: OOM when allocating tensor with shape[3136,1024] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: fc_w1/Adam/Initializer/zeros = Fill[T=DT_FLOAT, _class=["loc:@fc_w1"], index_type=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](fc_w1/Adam/Initializer/zeros/shape_as_tensor, fc_w1/Adam/Initializer/zeros/Const)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


Caused by op 'fc_w1/Adam/Initializer/zeros', defined at:
  File "D:\Users\user\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "D:\Users\user\Anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "D:\Users\user\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "D:\Users\user\Anaconda3\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "D:\Users\user\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 505, in start
    self.io_loop.start()
  File "D:\Users\user\Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "D:\Users\user\Anaconda3\lib\asyncio\base_events.py", line 438, in run_forever
    self._run_once()
  File "D:\Users\user\Anaconda3\lib\asyncio\base_events.py", line 1451, in _run_once
    handle._run()
  File "D:\Users\user\Anaconda3\lib\asyncio\events.py", line 145, in _run
    self._callback(*self._args)
  File "D:\Users\user\Anaconda3\lib\site-packages\tornado\ioloop.py", line 758, in _run_callback
    ret = callback()
  File "D:\Users\user\Anaconda3\lib\site-packages\tornado\stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "D:\Users\user\Anaconda3\lib\site-packages\tornado\gen.py", line 1233, in inner
    self.run()
  File "D:\Users\user\Anaconda3\lib\site-packages\tornado\gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "D:\Users\user\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "D:\Users\user\Anaconda3\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "D:\Users\user\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "D:\Users\user\Anaconda3\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "D:\Users\user\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "D:\Users\user\Anaconda3\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "D:\Users\user\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "D:\Users\user\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "D:\Users\user\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2819, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "D:\Users\user\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2845, in _run_cell
    return runner(coro)
  File "D:\Users\user\Anaconda3\lib\site-packages\IPython\core\async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "D:\Users\user\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3020, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "D:\Users\user\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3185, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "D:\Users\user\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-22-c60da40e9e00>", line 52, in <module>
    optimizer = tf.train.AdamOptimizer(learning_rate=0.003,).minimize(loss)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\optimizer.py", line 424, in minimize
    name=name)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\optimizer.py", line 600, in apply_gradients
    self._create_slots(var_list)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\adam.py", line 131, in _create_slots
    self._zeros_slot(v, "m", self._name)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\optimizer.py", line 1150, in _zeros_slot
    new_slot_variable = slot_creator.create_zeros_slot(var, op_name)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\slot_creator.py", line 181, in create_zeros_slot
    colocate_with_primary=colocate_with_primary)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\slot_creator.py", line 155, in create_slot_with_initializer
    dtype)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\slot_creator.py", line 65, in _create_slot_var
    validate_shape=validate_shape)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 1317, in get_variable
    constraint=constraint)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 1079, in get_variable
    constraint=constraint)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 425, in get_variable
    constraint=constraint)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 394, in _true_getter
    use_resource=use_resource, constraint=constraint)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 786, in _get_single_variable
    use_resource=use_resource)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 2220, in variable
    use_resource=use_resource)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 2210, in <lambda>
    previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 2193, in default_variable_creator
    constraint=constraint)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\ops\variables.py", line 235, in __init__
    constraint=constraint)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\ops\variables.py", line 343, in _init_from_args
    initial_value(), name="initial_value", dtype=dtype)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 770, in <lambda>
    shape.as_list(), dtype=dtype, partition_info=partition_info)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\ops\init_ops.py", line 99, in __call__
    return array_ops.zeros(shape, dtype)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\ops\array_ops.py", line 1626, in zeros
    output = fill(shape, constant(zero, dtype=dtype), name=name)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 2716, in fill
    "Fill", dims=dims, value=value, name=name)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3392, in create_op
    op_def=op_def)
  File "D:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1718, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[3136,1024] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: fc_w1/Adam/Initializer/zeros = Fill[T=DT_FLOAT, _class=["loc:@fc_w1"], index_type=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](fc_w1/Adam/Initializer/zeros/shape_as_tensor, fc_w1/Adam/Initializer/zeros/Const)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.



In [None]:
tf.InteractiveSession.close(sess)

In [None]:
#CNN model

import numpy as np
import tensorflow as tf

def weight_variable(shape):
    initializer = tf.contrib.layers.xavier_initializer()
    return tf.get_variable('weight',shape,initializer=initializer,dtype=tf.float32)

def bias_variable(shape):
    initialier = tf.constance_initializer()
    return tf.get_variable('bias',shape,initializer=initializer,dtype=tf.float32)

def conv2d(inputs, w):
    return tf.nn.conv2d(inputs,w,strides = [1,1,1,1],padding = 'SAME')

def max_pooling_2x2(inputs,x=2,y=2):
    ksize = [1,x,y,1]
    return tf.nn.max_pool(inputs,ksize,strides = [1,2,2,1],padding = 'SAME')

class MNISTcnn(object):
    def __init__(self,inputs,targets,conf):
        self.inputs = inputs
        self.targets = targets
        self.keep_drop = tf.palceholder(tf.float32)
        
        # 1st layer
        with tf.variable_scope(conv1):
            W_conv1 = weight_variable([5, 5, 1, 32])
            b_conv1 = bias_variable([32])
            h_conv1 = tf.nn.relu(conv2d(self.inputs, W_conv1) + b_conv1)
            h_pool1 = max_pool_2x2(h_conv1)
        # 2nd layer
        with tf.variable__scope(conv2):
            W_conv2 = weight_variable([5, 5, 32, 64])
            b_conv2 = bias_variable([64])
            h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
            h_pool2 = max_pool_2x2(h_conv2)
        #fully connected 1st layer
        with tf.variable_scope("fc1"):
            shape = int(np.prod(h_pool2.get_shape()[1:]))
            W_fc1 = weight_variable([shape, 1024])
            b_fc1 = bias_variable([1024])
            h_pool2_flat = tf.reshape(h_pool2, [-1, shape])
            h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
        # dropout
        h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob)
        #fully connected to outputs
        with tf.variable_scope("fc2"):
            W_fc2 = weight_variable([1024, 10])
            b_fc2 = bias_variable([10])
            outputs = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

        self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.targets, logits=outputs))
        self.pred = tf.argmax(outputs, 1)

        self.correct_prediction = tf.equal(tf.argmax(outputs,1), tf.argmax(self.targets,1))
        self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))