In [None]:
"""Contains a variant of the densenet model definition."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

slim = tf.contrib.slim

num_classes=200
def trunc_normal(stddev): return tf.truncated_normal_initializer(stddev=stddev)


def bn_act_conv_drp(current, num_outputs, kernel_size, scope='block'):
    current = slim.batch_norm(current, scope=scope + '_bn')
    current = tf.nn.relu(current)
    current = slim.conv2d(current, num_outputs, kernel_size, scope=scope + '_conv')
    current = slim.dropout(current, scope=scope + '_dropout')
    return current


def block(net, layers, growth, scope='block'):
    for idx in range(layers):
        bottleneck = bn_act_conv_drp(net, 4 * growth, [1, 1],
                                     scope=scope + '_conv1x1' + str(idx))
        tmp = bn_act_conv_drp(bottleneck, growth, [3, 3],
                              scope=scope + '_conv3x3' + str(idx))
        net = tf.concat(axis=3, values=[net, tmp])
    return net


def densenet(inputs, num_classes=200, is_training=True,
             dropout_keep_prob=0.8,
             scope='densenet'):
    """Creates a variant of the densenet model.

      images: A batch of `Tensors` of size [batch_size, height, width, channels].
      num_classes: the number of classes in the dataset.
      is_training: specifies whether or not we're currently training the model.
        This variable will determine the behaviour of the dropout layer.
      dropout_keep_prob: the percentage of activation values that are retained.
      prediction_fn: a function to get predictions out of logits.
      scope: Optional variable_scope.

    Returns:
      logits: the pre-softmax activations, a tensor of size
        [batch_size, `num_classes`]
      end_points: a dictionary from components of the network to the corresponding
        activation.
    """
    growth = 12
    compression_rate = 0.5

    def reduce_dim(input_feature):
        return int(int(input_feature.shape[-1]) * compression_rate)

    end_points = {}

    with tf.variable_scope(scope, 'DenseNet', [inputs, num_classes]):
        with slim.arg_scope(bn_drp_scope(is_training=is_training,
                                         keep_prob=dropout_keep_prob)) as ssc:
            pass
            ##########################
            # Put your code here.
            ##########################
            scope = 'conv1'
            net = slim.conv2d(inputs,2*growth,[7,7],scope=scope)
            end_points[scope] = net
            
            #output 320*320*304
            scope = 'block1'
            net = block(net,12,growth,scope=scope)
            end_points[scope] = net
            
            #output 320*320*152
            scope = 'compress1'
            net = bn_act_conv_drp(net,reduce_dim(net),[1,1],scope=scope)
            end_points[scope] = net
            
            #output 160*160*152
            scope = 'avgpool1'
            net = slim.avg_pool2d(net,[2,2],stride = 2,scope=scope)
            end_points[scope] = net
            
            #output 160*160*440
            scope = 'block2'
            net = block(net,12,growth,scope=scope)
            end_points[scope]=net
            
            #output 160*160*220
            scope ='compress2'
            net = bn_act_conv_drp(net,reduce_dim(net),[1,1],scope=scope)
            end_points[scope]=net
            
            #output 80*80*220
            scope = 'avgpool2'
            net = slim.avg_pool2d(net,[2,2],stride=2,scope=scope)
            end_points[scope]=net
            
            #output 80*80*508
            scope ='block3'
            net = block(net,12,growth,scope=scope)
            end_points[scope]=net
            
            #output 80*80*508
            scope ='last_batch_nom_act'
            net = slim.batch_norm(net,scope=scope)
            net = tf.nn.relu(net)
            end_points[scope]=net
            
            #output 1*1*508
            #net = slim.avg_pool2d(net,net.shape[1:3])
            #output 1*1*200
            #net = slim.flatten(net, scope='PreLogitsFlatten')          
            biases_initializer = tf.constant_initializer(0.1)
            net = slim.conv2d(net,num_classes,[1,1],biases_initializer=biases_initializer)
            net = slim.flatten(net, scope='PreLogitsFlatten')
            logits = slim.fully_connected(net, num_classes, activation_fn=None,
                                        scope='Logits')            
            #logits = tf.squeeze(net) 
            end_points['Logits'] = logits
            end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions')

    return logits, end_points


def bn_drp_scope(is_training=True, keep_prob=0.8):
    keep_prob = keep_prob if is_training else 1
    with slim.arg_scope(
        [slim.batch_norm],
            scale=True, is_training=is_training, updates_collections=None):
        with slim.arg_scope(
            [slim.dropout],
                is_training=is_training, keep_prob=keep_prob) as bsc:
            return bsc


def densenet_arg_scope(weight_decay=0.0004):
    """Defines the default densenet argument scope.

    Args:
      weight_decay: The weight decay to use for regularizing the model.

    Returns:
      An `arg_scope` to use for the inception v3 model.
    """
    with slim.arg_scope(
        [slim.conv2d],
        weights_initializer=tf.contrib.layers.variance_scaling_initializer(
            factor=2.0, mode='FAN_IN', uniform=False),
        activation_fn=None, biases_initializer=None, padding='same',
            stride=1) as sc:
        return sc


densenet.default_image_size = 112


learning_rate=0.1,batch_size=16,max_num_batches=64
2018-05-23 14:16:30.790275: I tensorflow/core/kernels/logging_ops.cc:79] eval/Accuracy[0.0048828125]
2018-05-23 14:16:30.790483: I tensorflow/core/kernels/logging_ops.cc:79] eval/Recall_5[0.028320312]

学习率设置成0.1，loss直接跑飞了，后面调小学习率，loss在4到5之间，训练了3120步时开始跑第一个验证，准确率比较低

INFO:tensorflow:global step 3060: loss = 4.6483 (1.326 sec/step)
learning_rate=0.001,batch_size=16,max_num_batches=128
2018-05-23 16:05:17.952950: I tensorflow/core/kernels/logging_ops.cc:79] eval/Accuracy[0.029296875]
2018-05-23 16:05:17.953034: I tensorflow/core/kernels/logging_ops.cc:79] eval/Recall_5[0.091308594]

** 对densenet的理解：**

* DenseNet提出是为了更好地解决vanishing-gradient（梯度消失）问题，网络整体结构也一定程度上减少了参数数量

* 网络结构最核心的是block层的“全连接”方式传递参数，Xl=H[X0,X1,X2...Xl-1],在channel维度上将前面所有层的输出拼接起来，充分利用了每一层的信息来缓解梯度消失问题

* 网络整体结构是3到4层block，每个block内部的长宽是一样的，为了在channel维度上拼接，block内部循环全连接，block之间通过transeform层连接

* 为了不让网络太大，会过拟合，在block内部通过1*1卷积降维，在block之间通过compress降维


**对growth的理解：**

* growth就是论文中提到的参数k，是bn_act_cov_drop这组操作中卷积核的数量，一个block增加的channel数就是 layer*growth，layer是循环次数
* growth一般不用取很大，在block层，深度会很快增加
* 在进入block层之前的卷积数，作者建议设置为2*growth
* bottleneck层的卷积数，作者建议设置为4*growth
* growth是一个超参，需要调节

