In [1]:
import logging
logging.basicConfig(level=logging.INFO)
import numpy as np
from matplotlib import pyplot as plt
import tensorflow as tf
import time

In [2]:
train_path = 'data/train.record'
save_path = 'model/alexnet/alexnet.ckpt'

name_id_map = {'cat': 0, 'dog': 1}
id_name_map = {id:name for name, id in name_id_map.items()}
n_classes = len(name_id_map)
logging.info('name_id_map: %s', name_id_map)
logging.info('id_name_map: %s', id_name_map)
logging.info('n_classes: %d', n_classes)

INFO:root:name_id_map: {'dog': 1, 'cat': 0}
INFO:root:id_name_map: {0: 'cat', 1: 'dog'}
INFO:root:n_classes: 2


### input layer

In [4]:
# Input: N*227*227*3
channels0 = 3
x_ = tf.placeholder(tf.float32, [None, 227, 227, channels0])
y_ = tf.placeholder(tf.int32, [None, n_classes])
logging.info('x_.shape = %s', x_.shape.as_list())
logging.info('y_.shape = %s', y_.shape.as_list())

INFO:root:x_.shape = [None, 227, 227, 3]
INFO:root:y_.shape = [None, 2]


### 1st layer: conv1(bn) => lrn1 => pool1

In [7]:
with tf.variable_scope('layer1'):
    # C1: N*227*227*3 => C11*11s4*4V => N*55*55*96
    channels1 = 96
    weight1 = tf.Variable(tf.truncated_normal([11, 11, channels0, channels1], stddev=0.01), name='weight1')
    conv1 = tf.nn.conv2d(x_, filter=weight1, strides=[1, 4, 4, 1], padding='VALID')
    # compare with bias
    # bias1 = tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[channels1]))
    # conv1 = tf.nn.bias_add(conv1, bias1)
    # batch normalization
    # y = (x - batch_mean) / sqrt(batch_var + epsilon)
    # z = scale * y + offset
    scale1 = tf.Variable(tf.ones([channels1]), name='scale1')
    offset1 = tf.Variable(tf.zeros(channels1), name='offset1')
    mean1 = tf.Variable(tf.zeros([channels1]), trainable=False, name='mean1')
    var1 = tf.Variable(tf.ones([channels1]), trainable=False, name='var1')
    batch_mean1, batch_var1 = tf.nn.moments(conv1, [0, 1, 2])
    epsilon1 = 0.0001
    train_mean1 = tf.assign(mean1, mean1 * 0.999 + batch_mean1 * 0.001)
    train_var1 = tf.assign(var1, var1 * 0.999 + batch_var1 * 0.001)
    with tf.control_dependencies([train_mean1, train_var1]):
        conv1 = tf.nn.batch_normalization(conv1, batch_mean1, batch_var1, offset1, scale1, epsilon1)
    conv1 = tf.nn.relu(conv1)
    logging.info('weight1: name=%s, shape=%s', weight1.name, weight1.shape.as_list())
    logging.info('scale1: name=%s, shape=%s', scale1.name, scale1.shape.as_list())
    logging.info('offset1: name=%s, shape=%s', offset1.name, offset1.shape.as_list())
    logging.info('mean1: name=%s, shape=%s', mean1.name, mean1.shape.as_list())
    logging.info('var1: name=%s, shape=%s', var1.name, var1.shape.as_list())
    logging.info('conv1.shape: %s', conv1.shape.as_list())

    # L1: N*55*55*96 => LRN => N*55*55*96 
    # sqr_sum[a, b, c, d] = sum(input[a, b, c, d-depth_radius:d+depth_radius+1] ** 2)
    # output = input / ((bias + alpha * sqr_sum) ** beta)
    lrn1 = tf.nn.lrn(conv1, depth_radius=4, bias=1.0, alpha=0.001/9, beta=0.75)
    logging.info('lrn1.shape: %s', lrn1.shape.as_list())

    # P1: N*55*55*96 => P3*3s2*2V => N*27*27*96
    pool1 = tf.nn.max_pool(lrn1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID')
    logging.info('pool1.shape: %s', pool1.shape.as_list())


INFO:root:weight1: name=layer1_1/weight1:0, shape=[11, 11, 3, 96]
INFO:root:scale1: name=layer1_1/scale1:0, shape=[96]
INFO:root:offset1: name=layer1_1/offset1:0, shape=[96]
INFO:root:mean1: name=layer1_1/mean1:0, shape=[96]
INFO:root:var1: name=layer1_1/var1:0, shape=[96]
INFO:root:conv1.shape: [None, 55, 55, 96]
INFO:root:lrn1.shape: [None, 55, 55, 96]
INFO:root:pool1.shape: [None, 27, 27, 96]


### 2nd layer: conv2（bn） => lrn2 => pool2

In [8]:
with tf.variable_scope('layer2'):
    # C2: N*27*27*96 => C5*5s1*1S => N*27*27*256
    channels2 = 256
    weight2 = tf.Variable(tf.truncated_normal([5, 5, channels1, channels2], stddev=0.01), name='weight2')
    conv2 = tf.nn.conv2d(pool1, filter=weight2, strides=[1, 1, 1, 1], padding='SAME')
    # compare with bias
    # bias2 = tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[channels2]))
    # conv2 = tf.nn.bias_add(conv2, bias2)
    # batch normalization
    # y = (x - batch_mean) / sqrt(batch_var + epsilon)
    # z = scale * y + offset
    scale2 = tf.Variable(tf.ones([channels2]), name='scale2')
    offset2 = tf.Variable(tf.zeros(channels2), name='offset2')
    mean2 = tf.Variable(tf.zeros([channels2]), trainable=False, name='mean2')
    var2 = tf.Variable(tf.ones([channels2]), trainable=False, name='var2')
    batch_mean2, batch_var2 = tf.nn.moments(conv2, [0, 1, 2])
    epsilon2 = 0.0001
    train_mean2 = tf.assign(mean2, mean2 * 0.999 + batch_mean2 * 0.001)
    train_var2 = tf.assign(var2, var2 * 0.999 + batch_var2 * 0.001)
    with tf.control_dependencies([train_mean2, train_var2]):
        conv2 = tf.nn.batch_normalization(conv2,             batch_mean2, batch_var2, offset2, scale2, epsilon2)
    conv2 = tf.nn.relu(conv2)
    logging.info('weight2: name=%s, shape=%s', weight2.name, weight2.shape.as_list())
    logging.info('scale2: name=%s, shape=%s', scale2.name, scale2.shape.as_list())
    logging.info('offset2: name=%s, shape=%s', offset2.name, offset2.shape.as_list())
    logging.info('mean2: name=%s, shape=%s', mean2.name, mean2.shape.as_list())
    logging.info('var2: name=%s, shape=%s', var2.name, var2.shape.as_list())
    logging.info('conv2.shape: %s', conv2.shape.as_list())

    # L2: N*27*27*256 => LRN => N*27*27*256 
    # sqr_sum[a, b, c, d] = sum(input[a, b, c, d-depth_radius:d + depth_radius+1] ** 2)
    # output = input / ((bias + alpha * sqr_sum) ** beta)
    lrn2 = tf.nn.lrn(conv2, depth_radius=4, bias=1.0, alpha=0.001/9, beta=0.75)
    logging.info('lrn2.shape: %s', lrn2.shape.as_list())

    # P2: N*27*27*256 => P3*3s2*2V => N*13*13*256
    pool2 = tf.nn.max_pool(lrn2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID')
    logging.info('pool2.shape: %s', pool2.shape.as_list())

INFO:root:weight2: name=layer2/weight2:0, shape=[5, 5, 96, 256]
INFO:root:scale2: name=layer2/scale2:0, shape=[256]
INFO:root:offset2: name=layer2/offset2:0, shape=[256]
INFO:root:mean2: name=layer2/mean2:0, shape=[256]
INFO:root:var2: name=layer2/var2:0, shape=[256]
INFO:root:conv2.shape: [None, 27, 27, 256]
INFO:root:lrn2.shape: [None, 27, 27, 256]
INFO:root:pool2.shape: [None, 13, 13, 256]


### 3rd layer: conv3(bn)

In [9]:
with tf.variable_scope('layer3'):
    # C3: N*13*13*256 => C3*3s1*1S => N*13*13*384
    channels3 = 384
    weight3 = tf.Variable(tf.truncated_normal([3, 3, channels2, channels3], stddev=0.01), name='weight3')
    conv3 = tf.nn.conv2d(pool2, filter=weight3, strides=[1, 1, 1, 1], padding='SAME')
    # compare with bias
    # bias3 = tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[channels3]))
    # conv3 = tf.nn.bias_add(conv3, bias3)
    # batch normalization
    # y = (x - batch_mean) / sqrt(batch_var + epsilon)
    # z = scale * y + offset
    scale3 = tf.Variable(tf.ones([channels3]), name='scale3')
    offset3 = tf.Variable(tf.zeros(channels3), name='offset3')
    mean3 = tf.Variable(tf.zeros([channels3]), trainable=False, name='mean3')
    var3 = tf.Variable(tf.ones([channels3]), trainable=False, name='var3')
    batch_mean3, batch_var3 = tf.nn.moments(conv3, [0, 1, 2])
    epsilon3 = 0.0001
    train_mean3 = tf.assign(mean3, mean3 * 0.999 + batch_mean3 * 0.001)
    train_var3 = tf.assign(var3, var3 * 0.999 + batch_var3 * 0.001)
    with tf.control_dependencies([train_mean3, train_var3]):
        conv3 = tf.nn.batch_normalization(conv3, batch_mean3, batch_var3, offset3, scale3, epsilon3)
    conv3 = tf.nn.relu(conv3)
    logging.info('weight3: name=%s, shape=%s', weight3.name, weight3.shape.as_list())
    logging.info('scale3: name=%s, shape=%s', scale3.name, scale3.shape.as_list())
    logging.info('offset3: name=%s, shape=%s', offset3.name, offset3.shape.as_list())
    logging.info('mean3: name=%s, shape=%s', mean3.name, mean3.shape.as_list())
    logging.info('var3: name=%s, shape=%s', var3.name, var3.shape.as_list())
    logging.info('conv3.shape: %s', conv3.shape.as_list())

INFO:root:weight3: name=layer3/weight3:0, shape=[3, 3, 256, 384]
INFO:root:scale3: name=layer3/scale3:0, shape=[384]
INFO:root:offset3: name=layer3/offset3:0, shape=[384]
INFO:root:mean3: name=layer3/mean3:0, shape=[384]
INFO:root:var3: name=layer3/var3:0, shape=[384]
INFO:root:conv3.shape: [None, 13, 13, 384]


### 4th layer: conv4(bn)

In [10]:
with tf.variable_scope('layer4'):
    # C4: N*13*13*384 => C3*3s1*1S => N*13*13*384
    channels4 = 384
    weight4 = tf.Variable(tf.truncated_normal([3, 3, channels3, channels4], stddev=0.01), name='weight4')
    conv4 = tf.nn.conv2d(conv3, filter=weight4, strides=[1, 1, 1, 1], padding='SAME')
    # compare with bias
    # bias4 = tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[channels4]))
    # conv4 = tf.nn.bias_add(conv4, bias4)
    # batch normalization
    # y = (x - batch_mean) / sqrt(batch_var + epsilon)
    # z = scale * y + offset
    scale4 = tf.Variable(tf.ones([channels4]), name='scale4')
    offset4 = tf.Variable(tf.zeros(channels4), name='offset4')
    mean4 = tf.Variable(tf.zeros([channels4]), trainable=False, name='mean4')
    var4 = tf.Variable(tf.ones([channels4]), trainable=False, name='var4')
    batch_mean4, batch_var4 = tf.nn.moments(conv4, [0, 1, 2])
    epsilon4 = 0.0001
    train_mean4 = tf.assign(mean4, mean4 * 0.999 + batch_mean4 * 0.001)
    train_var4 = tf.assign(var4, var4 * 0.999 + batch_var4 * 0.001)
    with tf.control_dependencies([train_mean4, train_var4]):
        conv4 = tf.nn.batch_normalization(conv4, batch_mean4, batch_var4, offset4, scale4, epsilon4)
    conv4 = tf.nn.relu(conv4)
    logging.info('weight4: name=%s, shape=%s', weight4.name, weight4.shape.as_list())
    logging.info('scale4: name=%s, shape=%s', scale4.name, scale4.shape.as_list())
    logging.info('offset4: name=%s, shape=%s', offset4.name, offset4.shape.as_list())
    logging.info('mean4: name=%s, shape=%s', mean4.name, mean4.shape.as_list())
    logging.info('var4: name=%s, shape=%s', var4.name, var4.shape.as_list())
    logging.info('conv4.shape: %s', conv4.shape.as_list())

INFO:root:weight4: name=layer4/weight4:0, shape=[3, 3, 384, 384]
INFO:root:scale4: name=layer4/scale4:0, shape=[384]
INFO:root:offset4: name=layer4/offset4:0, shape=[384]
INFO:root:mean4: name=layer4/mean4:0, shape=[384]
INFO:root:var4: name=layer4/var4:0, shape=[384]
INFO:root:conv4.shape: [None, 13, 13, 384]


### 5th layer: conv5(bn) => pool5 => reshape5

In [12]:
with tf.variable_scope('layer5'):
    # C5: N*13*13*384 => C3*3s1*1S => N*13*13*256
    channels5 = 256
    weight5 = tf.Variable(tf.truncated_normal([3, 3, channels4, channels5], stddev=0.01), name='weight5')
    conv5 = tf.nn.conv2d(conv4, filter=weight5, strides=[1, 1, 1, 1], padding='SAME')
    # compare with bias
    # bias5 = tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[channels5]))
    # conv5 = tf.nn.bias_add(conv5, bias5)
    # batch normalization
    # y = (x - batch_mean) / sqrt(batch_var + epsilon)
    # z = scale * y + offset
    scale5 = tf.Variable(tf.ones([channels5]), name='scale5')
    offset5 = tf.Variable(tf.zeros(channels5), name='offset5')
    mean5 = tf.Variable(tf.zeros([channels5]), trainable=False, name='mean5')
    var5 = tf.Variable(tf.ones([channels5]), trainable=False, name='var5')
    batch_mean5, batch_var5 = tf.nn.moments(conv5, [0, 1, 2])
    epsilon5 = 0.0001
    train_mean5 = tf.assign(mean5, mean5 * 0.999 + batch_mean5 * 0.001)
    train_var5 = tf.assign(var5, var5 * 0.999 + batch_var5 * 0.001)
    with tf.control_dependencies([train_mean5, train_var5]):
        conv5 = tf.nn.batch_normalization(conv5, batch_mean5, batch_var5, offset5, scale5, epsilon5)
    conv5 = tf.nn.relu(conv5)
    logging.info('weight5: name=%s, shape=%s', weight5.name, weight5.shape.as_list())
    logging.info('scale5: name=%s, shape=%s', scale5.name, scale5.shape.as_list())
    logging.info('offset5: name=%s, shape=%s', offset5.name, offset5.shape.as_list())
    logging.info('mean5: name=%s, shape=%s', mean5.name, mean5.shape.as_list())
    logging.info('var5: name=%s, shape=%s', var5.name, var5.shape.as_list())
    logging.info('conv5.shape: %s', conv5.shape.as_list())

    # P5: N*13*13*256 => P3*3s2*2V => N*6*6*256
    pool5 = tf.nn.max_pool(conv5, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID')
    logging.info('pool5.shape: %s', pool5.shape.as_list())

    # R5: N*6*6*256 => Reshape => N*9216
    nodes5 = 6 * 6 * 256
    reshape5 = tf.reshape(pool5, [-1, nodes5])
    logging.info('reshape5.shape: %s', reshape5.shape.as_list())

INFO:root:weight5: name=layer5/weight5:0, shape=[3, 3, 384, 256]
INFO:root:scale5: name=layer5/scale5:0, shape=[256]
INFO:root:offset5: name=layer5/offset5:0, shape=[256]
INFO:root:mean5: name=layer5/mean5:0, shape=[256]
INFO:root:var5: name=layer5/var5:0, shape=[256]
INFO:root:conv5.shape: [None, 13, 13, 256]
INFO:root:pool5.shape: [None, 6, 6, 256]
INFO:root:reshape5.shape: [None, 9216]


### 6th layer: full6(bn) => drop6

In [13]:
with tf.variable_scope('layer6'):
    # F6: N*9216 => N*4096
    nodes6 = 4096
    weight6 = tf.Variable(tf.truncated_normal([nodes5, nodes6], stddev=0.1), name='weight6')
    full6 = tf.matmul(reshape5, weight6)
    # compare with bias
    # bias6 = tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[nodes6]))
    # full6 = tf.nn.bias_add(full6, bias6)
    # batch normalization
    # y = (x - batch_mean) / sqrt(batch_var + epsilon)
    # z = scale * y + offset
    scale6 = tf.Variable(tf.ones([nodes6]), name='scale6')
    offset6 = tf.Variable(tf.zeros(nodes6), name='offset6')
    mean6 = tf.Variable(tf.zeros([nodes6]), trainable=False, name='mean6')
    var6 = tf.Variable(tf.ones([nodes6]), trainable=False, name='var6')
    batch_mean6, batch_var6 = tf.nn.moments(full6, [0])
    epsilon6 = 0.0001
    train_mean6 = tf.assign(mean6, mean6 * 0.999 + batch_mean6 * 0.001)
    train_var6 = tf.assign(var6, var6 * 0.999 + batch_var6 * 0.001)
    with tf.control_dependencies([train_mean6, train_var6]):
        full6 = tf.nn.batch_normalization(full6, batch_mean6, batch_var6, offset6, scale6, epsilon6)
    full6 = tf.nn.relu(full6)
    logging.info('weight6: name=%s, shape=%s', weight6.name, weight6.shape.as_list())
    logging.info('scale6: name=%s, shape=%s', scale6.name, scale6.shape.as_list())
    logging.info('offset6: name=%s, shape=%s', offset6.name, offset6.shape.as_list())
    logging.info('mean6: name=%s, shape=%s', mean6.name, mean6.shape.as_list())
    logging.info('var6: name=%s, shape=%s', var6.name, var6.shape.as_list())
    logging.info('full6.shape: %s', full6.shape.as_list())

    # D6: N*4096 => N*4096
    # the dropped elements become 0, and the kept elements scaled 
    # up by 1/keep_prob, which will unchange the expected sum.
    keep_prob6 = 0.5
    drop6 = tf.nn.dropout(full6, keep_prob6)
    logging.info('drop6.shape: %s', drop6.shape.as_list())

INFO:root:weight6: name=layer6/weight6:0, shape=[9216, 4096]
INFO:root:scale6: name=layer6/scale6:0, shape=[4096]
INFO:root:offset6: name=layer6/offset6:0, shape=[4096]
INFO:root:mean6: name=layer6/mean6:0, shape=[4096]
INFO:root:var6: name=layer6/var6:0, shape=[4096]
INFO:root:full6.shape: [None, 4096]
INFO:root:drop6.shape: [None, 4096]


### 7th layer: full7(bn) => drop7

In [14]:
with tf.variable_scope('layer7'):
    # F7: N*4096 => N*2048
    nodes7 = 2048
    weight7 = tf.Variable(tf.truncated_normal([nodes6, nodes7], stddev=0.1), name='weight7')
    full7 = tf.matmul(drop6, weight7)
    # compare with bias
    # bias7 = tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[nodes7]))
    # full7 = tf.nn.bias_add(full7, bias7)
    # batch normalization
    # y = (x - batch_mean) / sqrt(batch_var + epsilon)
    # z = scale * y + offset
    scale7 = tf.Variable(tf.ones([nodes7]), name='scale7')
    offset7 = tf.Variable(tf.zeros(nodes7), name='offset7')
    mean7 = tf.Variable(tf.zeros([nodes7]), trainable=False, name='mean7')
    var7 = tf.Variable(tf.ones([nodes7]), trainable=False, name='var7')
    batch_mean7, batch_var7 = tf.nn.moments(full7, [0])
    epsilon7 = 0.0001
    train_mean7 = tf.assign(mean7, mean7 * 0.999 + batch_mean7 * 0.001)
    train_var7 = tf.assign(var7, var7 * 0.999 + batch_var7 * 0.001)
    with tf.control_dependencies([train_mean7, train_var7]):
        full7 = tf.nn.batch_normalization(full7, batch_mean7, batch_var7, offset7, scale7, epsilon7)
    full7 = tf.nn.relu(full7)
    logging.info('weight7: name=%s, shape=%s', weight7.name, weight7.shape.as_list())
    logging.info('scale7: name=%s, shape=%s', scale7.name, scale7.shape.as_list())
    logging.info('offset7: name=%s, shape=%s', offset7.name, offset7.shape.as_list())
    logging.info('mean7: name=%s, shape=%s', mean7.name, mean7.shape.as_list())
    logging.info('var7: name=%s, shape=%s', var7.name, var7.shape.as_list())
    logging.info('full7.shape: %s', full7.shape.as_list())

    # D7: N*2048 => N*2048
    # the dropped elements become 0, and the kept elements scaled 
    # up by 1/keep_prob, which will unchange the expected sum.
    keep_prob7 = 0.5
    drop7 = tf.nn.dropout(full7, keep_prob7)
    logging.info('drop7.shape: %s', drop7.shape.as_list())

INFO:root:weight7: name=layer7/weight7:0, shape=[4096, 2048]
INFO:root:scale7: name=layer7/scale7:0, shape=[2048]
INFO:root:offset7: name=layer7/offset7:0, shape=[2048]
INFO:root:mean7: name=layer7/mean7:0, shape=[2048]
INFO:root:var7: name=layer7/var7:0, shape=[2048]
INFO:root:full7.shape: [None, 2048]
INFO:root:drop7.shape: [None, 2048]


### 8th layer: full8(bn)

In [16]:
with tf.variable_scope('layer8'):
    # F8: N*2048 => N*2
    nodes8 = n_classes
    weight8 = tf.Variable(tf.truncated_normal([nodes7, nodes8], stddev=0.1), name='weight8')
    full8 = tf.matmul(drop7, weight8)
    # compare with bias
    bias8 = tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[nodes8]), name='bias8')
    full8 = tf.nn.bias_add(full8, bias8)
    logging.info('weight8: name=%s, shape=%s', bias8.name, bias8.shape.as_list())
    logging.info('bias8: name=%s, shape=%s', bias8.name, bias8.shape.as_list())
    logging.info('full8.shape: %s', full8.shape.as_list())

INFO:root:weight8: name=layer8_1/bias8:0, shape=[2]
INFO:root:bias8: name=layer8_1/bias8:0, shape=[2]
INFO:root:full8.shape: [None, 2]


### output layer: softmax

In [17]:
y = tf.nn.softmax(full8)
logging.info('y.shape: %s', y.shape.as_list())

INFO:root:y.shape: [None, 2]


### train

In [18]:
learning_rate = 1.0e-4
y_ = tf.cast(y_, tf.float32)
# y maybe 0, which will lead to cross_entropy be nan
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), axis=1))
#eps = 1e-10
#cross_entropy = tf.reduce_mean(
#    -tf.reduce_sum(y_ * tf.log(tf.clip_by_value(y, eps, 1.0 - eps)), axis=1))
y_ls = tf.nn.log_softmax(full8) # log(e^k/sigma(e^i)) = k - log(sigma(e^i)) != nan
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * y_ls, axis=1))
#cross_entropy = tf.reduce_mean( \
#    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=full8))

#train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
logging.info('cross_entropy.type: %s', cross_entropy.dtype)

INFO:root:cross_entropy.type: <dtype: 'float32'>


In [19]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

batch_size = 8

reader = tf.TFRecordReader()  # no need to close
filename_queue = tf.train.string_input_producer([train_path])
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
    serialized_example,
    features = {
        'image_raw': tf.FixedLenFeature([], tf.string),
        'label': tf.FixedLenFeature([], tf.int64)
    }
)
image = tf.decode_raw(features['image_raw'], tf.uint8)
image = tf.cast(tf.reshape(image, [227, 227, 3]), tf.float32)
label = features['label']
logging.info('image.shape: %s', image.shape.as_list())
logging.info('label.shape: %s', label.shape.as_list())

num_threads = 4
min_after_dequeue = num_threads * batch_size
capacity = 3 * num_threads * batch_size # 32: min_after_dequeue, 64: extra
image_batch, label_batch = tf.train.shuffle_batch([image, label], batch_size = batch_size, \
            capacity = capacity, min_after_dequeue = min_after_dequeue, num_threads = num_threads)
logging.info('image_batch: dtype = %s, shape = %s', image_batch.dtype, image_batch.shape.as_list())
logging.info('label_batch: dtype = %s. shape = %s', label_batch.dtype, label_batch.shape.as_list())

coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess = sess, coord = coord)
            
saver = tf.train.Saver()

losses = []

epoches = 1000000
onehot_code = [[1.0, 0.0], [0.0, 1.0]]
logging.info('training begin: epoches = %d', epoches)
t0 = time.time()
for epoch in range(1, epoches+1):
    images, labels = sess.run([image_batch, label_batch])
    labels = np.array([onehot_code[lbl] for lbl in labels])
    train_step.run({x_:images, y_:labels})
    t = time.time() - t0
    loss = cross_entropy.eval({x_:images, y_:labels})
    losses.append(loss)
    if epoch % 100 == 0:
        t = time.time() - t0
        t0 = time.time()
        logging.info('[epoch %6d] loss = %.6f (%.2f seconds/100 epoches)', epoch, loss, t)
    if epoch % 100000 == 0:
        logging.info('saving model to %s', save_path)
        saver.save(sess, save_path, global_step=epoch)

logging.info('traing finished: total = %d', epoch)
logging.info('save model to %s', save_path)
saver.save(sess, save_path, global_step=epoch)

coord.request_stop()
coord.join(threads)

plt.figure()
plt.plot(losses)
plt.tight_layout()
ple.show()

sess.close()


ResourceExhaustedError: OOM when allocating tensor with shape[4096,2048] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: layer7/weight7/Adam/Initializer/zeros = Fill[T=DT_FLOAT, _class=["loc:@layer7/weight7"], index_type=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](layer7/weight7/Adam/Initializer/zeros/shape_as_tensor, layer7/weight7/Adam/Initializer/zeros/Const)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


Caused by op u'layer7/weight7/Adam/Initializer/zeros', defined at:
  File "/usr/lib/python2.7/runpy.py", line 174, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/usr/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python2.7/dist-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/usr/local/lib/python2.7/dist-packages/tornado/ioloop.py", line 1064, in start
    handler_func(fd_obj, events)
  File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2705, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2809, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2869, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-18-92d6678d063b>", line 14, in <module>
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 409, in minimize
    name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 552, in apply_gradients
    self._create_slots([_get_variable_for(v) for v in var_list])
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/adam.py", line 131, in _create_slots
    self._zeros_slot(v, "m", self._name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 984, in _zeros_slot
    new_slot_variable = slot_creator.create_zeros_slot(var, op_name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/slot_creator.py", line 179, in create_zeros_slot
    colocate_with_primary=colocate_with_primary)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/slot_creator.py", line 153, in create_slot_with_initializer
    dtype)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/slot_creator.py", line 65, in _create_slot_var
    validate_shape=validate_shape)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 1297, in get_variable
    constraint=constraint)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 1093, in get_variable
    constraint=constraint)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 439, in get_variable
    constraint=constraint)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 408, in _true_getter
    use_resource=use_resource, constraint=constraint)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 800, in _get_single_variable
    use_resource=use_resource)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 2157, in variable
    use_resource=use_resource)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 2147, in <lambda>
    previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 2130, in default_variable_creator
    constraint=constraint)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variables.py", line 235, in __init__
    constraint=constraint)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variables.py", line 337, in _init_from_args
    initial_value(), name="initial_value", dtype=dtype)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 784, in <lambda>
    shape.as_list(), dtype=dtype, partition_info=partition_info)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/init_ops.py", line 99, in __call__
    return array_ops.zeros(shape, dtype)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/array_ops.py", line 1601, in zeros
    output = fill(shape, constant(zero, dtype=dtype), name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 2583, in fill
    "Fill", dims=dims, value=value, name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 3290, in create_op
    op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1654, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[4096,2048] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: layer7/weight7/Adam/Initializer/zeros = Fill[T=DT_FLOAT, _class=["loc:@layer7/weight7"], index_type=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](layer7/weight7/Adam/Initializer/zeros/shape_as_tensor, layer7/weight7/Adam/Initializer/zeros/Const)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

