In [1]:
import collections
import tensorflow as tf
slim = tf.contrib.slim

  from ._conv import register_converters as _register_converters


* namedtuple 语法说明 

TPoint = namedtuple('TPoint', ['x', 'y']) 创建一个TPoint类型，而且带有属性x, y.

In [2]:
class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])):
    pass

In [3]:
# Block 测试
blocks = [
      Block(
          'block1', None, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
      Block(
          'block2', None, [(512, 128, 1)] * 23 + [(512, 128, 2)]),
      Block(
          'block3', None, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
      Block(
          'block4', None, [(2048, 512, 1)] * 3)]
for block in blocks:
    print(block.scope,block.args)

block1 [(256, 64, 1), (256, 64, 1), (256, 64, 2)]
block2 [(512, 128, 1), (512, 128, 1), (512, 128, 1), (512, 128, 1), (512, 128, 1), (512, 128, 1), (512, 128, 1), (512, 128, 1), (512, 128, 1), (512, 128, 1), (512, 128, 1), (512, 128, 1), (512, 128, 1), (512, 128, 1), (512, 128, 1), (512, 128, 1), (512, 128, 1), (512, 128, 1), (512, 128, 1), (512, 128, 1), (512, 128, 1), (512, 128, 1), (512, 128, 1), (512, 128, 2)]
block3 [(1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 1), (1024, 256, 2)

* padding 有SAME和VALID两种方式，SAME不是让输入和输出图片尺寸相同的意思，而且对filter在走的时候，边界上不足filter长度时，做适当填充。

  自己实现的samp padding功能

In [4]:
def conv2d_same(inputs, num_outputs, kernel_size, stride, scope=None):
    if stride == 1:
        return slim.conv2d(inputs, num_outputs, kernel_size,stride=stride, padding='SAME', scope=scope)
    else:
        # exampl inputs:224x224x3,filter=7x7,pading后，230x230x3, conv2d(64个 filter 7x7, stride=2) 112x112x64
        pad_total = kernel_size - 1
        pad_beg = pad_total // 2
        pad_end = pad_total - pad_beg
        inputs = tf.pad(inputs, [[0,0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
        return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride, padding='VALID', scope=scope)        

In [5]:
def subsample(inputs, factor, scope=None):
    if factor == 1:
        return inputs # strider = 1 不需要采样。
    else:
        return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope)

In [6]:
@slim.add_arg_scope
def bottlenect(inputs, depth, depth_bottleneck, stride, outputs_collections=None, scope=None):
#     depth 最后输出的 通道数，depth_bottleneck中间两层的通道数，stride中间那层的stride
    print('输入：',inputs.shape)
    with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc:
        depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
        preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact')
        if depth == depth_in:
            # 如果输入通道数和输出通道数相等，则需要对inputs降采样,(确保空间尺寸和主路径(残差）的一致，因为主路径中间那层有stride)
            # 所有降采样也要用stride
            shortcut = subsample(inputs, stride, 'shortcut')
        else:
            shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride, 
                                   normalizer_fn=None, activation_fn=None, scope='shortcut')
        print('shortcut.shape',shortcut.shape) #56x56x256,
        residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1, scope='conv1')
        print('residual1.shape',residual.shape)
        residual = conv2d_same(residual, num_outputs=depth_bottleneck, kernel_size=3,stride=stride, scope='conv2')
        print('residual2.shape',residual.shape)
        residual = slim.conv2d(residual, depth, [1, 1], stride=1, normalizer_fn=None, activation_fn=None, scope='conv3')
        print('residual3.shape',residual.shape)
        output = shortcut + residual
        print('output.shape',output.shape)
        return slim.utils.collect_named_outputs(outputs_collections, sc.name, output)

In [7]:
@slim.add_arg_scope
def stack_blocks_dense(net, blocks, outputs_collections=None):
    for block in blocks:
        print("block.scope",block.scope)
        with tf.variable_scope(block.scope,'block', [net]) as sc:
            for i, unit in enumerate(block.args):
                print("block.arg",unit)
                with tf.variable_scope('unit_%d' %(i + 1), values=[net]):
                    # 三层卷积，分别为：1x1，unit_depth_bottleneck;3x3,unit_depth_bottleneck,stride = unit_stride
                    # 输出层 1x1,unit_depth
                    unit_depth, unit_depth_bottleneck, unit_stride = unit
                    net = block.unit_fn(net, unit_depth, unit_depth_bottleneck, unit_stride)
                    print('输出：',net.shape)
    return net

In [8]:
def resnet_v2(inputs, blocks, num_classes=None, global_pool=True, include_root_block=True, reuse=None, scope=None):
    with tf.variable_scope(scope,'resnet_v2', [inputs], reuse=reuse) as sc:
        end_point_collection = sc.original_name_scope + '_end_point'
        with slim.arg_scope([slim.conv2d, bottlenect, stack_blocks_dense], outputs_collections=end_point_collection):
            net = inputs
            if include_root_block:# 最开始的处理
                with slim.arg_scope([slim.conv2d], activation_fn=None, normalizer_fn=None):
                    net = conv2d_same(net, 64, 7, stride=2, scope='conv1') 
#                     print('net.shape',net.shape) # 112x112x64
                net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME',scope='pool1') 
#                 print('net2.shape',net.shape)# 56x56x64
            # 层层堆叠的blocks
            net = stack_blocks_dense(net, blocks)
            net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm')
            if global_pool:
                net = tf.reduce_mean(net, [1, 2], name='pool5',keepdims=True)
            if num_classes is not None:
                net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits')
            end_points = slim.utils.convert_collection_to_dict(end_point_collection)
            if num_classes is not None:
                end_points['predictions'] = slim.softmax(net, scope='predictons')
            return net, end_points
        

In [9]:
def resnet_v2_50(inputs, num_classes=None, global_pool=True, reuse = None, scope='resnet_v2_50'):
    blocks = [
        Block('block1', bottlenect, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        Block('block2', bottlenect, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
        Block('block3', bottlenect, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]),
        Block('block4', bottlenect, [(2048, 512, 1)] * 3)
    ]
    return resnet_v2(inputs, blocks, num_classes, global_pool, include_root_block=True, reuse=reuse, scope=scope)

In [10]:
def time_tensorflow_run(session, target, info_string):
    num_steps_burn_in = 10
    total_duration = 0.0
    total_duration_squared = 0.0
    for i in range(num_batches + num_steps_burn_in):
        start_time = time.time()
        _ = session.run(target)
        duration = time.time() - start_time
        if i >= num_steps_burn_in:
            if not i % 10:
                print ('%s: step %d, duration = %.3f' %
                       (datetime.now(), i - num_steps_burn_in, duration))
            total_duration += duration
            total_duration_squared += duration * duration
    mn = total_duration / num_batches
    vr = total_duration_squared / num_batches - mn * mn
    sd = math.sqrt(vr)
    print ('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
           (datetime.now(), info_string, num_batches, mn, sd))

In [11]:
def resnet_arg_scope(is_training=True, # 训练标记
                     weight_decay=0.0001, # 权重衰减速率
                     batch_norm_decay=0.997, # BN的衰减速率
                     batch_norm_epsilon=1e-5, #  BN的epsilon默认1e-5
                     batch_norm_scale=True): # BN的scale默认值

  batch_norm_params = { # 定义batch normalization（标准化）的参数字典
      'is_training': is_training,
      'decay': batch_norm_decay,
      'epsilon': batch_norm_epsilon,
      'scale': batch_norm_scale,
      'updates_collections': tf.GraphKeys.UPDATE_OPS,
  }

  with slim.arg_scope( # 通过slim.arg_scope将[slim.conv2d]的几个默认参数设置好
      [slim.conv2d],
      weights_regularizer=slim.l2_regularizer(weight_decay), # 权重正则器设置为L2正则 
      weights_initializer=slim.variance_scaling_initializer(), # 权重初始化器
      activation_fn=tf.nn.relu, # 激活函数
      normalizer_fn=slim.batch_norm, # 标准化器设置为BN
      normalizer_params=batch_norm_params):
    with slim.arg_scope([slim.batch_norm], **batch_norm_params):
      with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc: # ResNet原论文是VALID模式，SAME模式可让特征对齐更简单
        return arg_sc # 最后将基层嵌套的arg_scope作为结果返回

In [12]:
from datetime import datetime
import math
import time

In [13]:
batch_size = 32
height, width = 224, 224
inputs = tf.random_uniform((batch_size, height, width, 3))
with slim.arg_scope(resnet_arg_scope(is_training=False)): # is_training设置为false
   net, end_points = resnet_v2_50(inputs, 1000)
  
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)  
num_batches=100
time_tensorflow_run(sess, net, "Forward") 

block.scope block1
block.arg (256, 64, 1)
输入： (32, 56, 56, 64)
shortcut.shape (32, 56, 56, 256)
residual1.shape (32, 56, 56, 64)
residual2.shape (32, 56, 56, 64)
residual3.shape (32, 56, 56, 256)
output.shape (32, 56, 56, 256)
输出： (32, 56, 56, 256)
block.arg (256, 64, 1)
输入： (32, 56, 56, 256)
shortcut.shape (32, 56, 56, 256)
residual1.shape (32, 56, 56, 64)
residual2.shape (32, 56, 56, 64)
residual3.shape (32, 56, 56, 256)
output.shape (32, 56, 56, 256)
输出： (32, 56, 56, 256)
block.arg (256, 64, 2)
输入： (32, 56, 56, 256)
shortcut.shape (32, 28, 28, 256)
residual1.shape (32, 56, 56, 64)
residual2.shape (32, 28, 28, 64)
residual3.shape (32, 28, 28, 256)
output.shape (32, 28, 28, 256)
输出： (32, 28, 28, 256)
block.scope block2
block.arg (512, 128, 1)
输入： (32, 28, 28, 256)
shortcut.shape (32, 28, 28, 512)
residual1.shape (32, 28, 28, 128)
residual2.shape (32, 28, 28, 128)
residual3.shape (32, 28, 28, 512)
output.shape (32, 28, 28, 512)
输出： (32, 28, 28, 512)
block.arg (512, 128, 1)
输入： (32, 28,