# ResNet 2015(kaiming He)

## 技巧+实现
> 使得更加深层网络得以实现 减少全连接层 pooling层

- 问题
    - 模型加深效果变差：不是学不到 而是难优化 难学到
- residual learing
    - 深层网络至少可以持平浅层网络
    - 残差连接 x+f(x) x作为indentity
    - resnet34 resnet101 使用不同残差连接块 resnet101使用了前后1\*1
- 具体
    - 首先经过constride=2 以及maxpooling降低尺寸
    - 之后经过4次残差连接块 每组有多个相同规模残差子结构
    - 之后通过global average pooling 而不是全连接层
    - 最后+1000fc softmax实现1000分类输出
- 减少了全连接层 从而可以在卷积层增加更多参数
- 残差结构使需要学习的知识减少 容易学习
- 每一层由于x数据分布相近 容易学习
    - 普通网络不同batch在深层上数据分布相差可能大 

## 实现细节
- 残差连接需要小心x输入 如果已经降采样 需要对x进行pooling降采样
- 每次subsampling之后通道数要翻倍
- 每次经过一个完整的残差连接块之后实现一次subsampling
- subsampling可以通过stride=2的conv / maxpooling 实现 
- 通道数目改变可以借助1\*1 

In [1]:
import tensorflow as tf
import os
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

CIFAR_DIR = './cifar-10-batches-py'
print(os.listdir(CIFAR_DIR))

['batches.meta', 'data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5', 'readme.html', 'test_batch']


### residual structure

In [11]:
def residual_block(x,output_channel):
    """residual block implication"""
    """
    通过输出通道数目判断是否需要subsampling 
    从而对通道数目 strides 以及x大小进行调整
    """
    input_channel = x.get_shape().as_list()[-1]
    if input_channel*2 == output_channel:
        subsampling = True
        strides = (2,2)
    elif input_channel == output_channel:
        subsampling = False
        strides = (1,1)
    else:
        raise Exception("input channel can't match output channel")
    # 对于第一个conv的stride处理
    conv1 = tf.layers.conv2d(x,output_channel,(3,3),strides=strides,
                            padding = 'same',activation = tf.nn.relu,
                            name = 'conv1')
    conv2 = tf.layers.conv2d(conv1,output_channel,(3,3),strides=(1,1),
                            padding = 'same',activation = tf.nn.relu,
                            name = 'conv2')
    if subsampling:
        # 对于x的处理
        # subsampling
        pooled_x = tf.layers.average_pooling2d(x,(2,2),(2,2),
                                               padding='valid')
        # change channels
        padded_x = tf.pad(pooled_x,
                          [[0,0],[0,0],[0,0],
                          [input_channel//2,input_channel//2]])
    else:
        padded_x = x
    output_x = conv2 + padded_x
    return output_x
def res_net(x,num_residual_blocks,num_channel_base,class_num):
    """residual network inplementation"""
    """
    Args:
    - num_residual_blocks: e.g.:[3,4,6,3]
    - num_channel_base: 第一层的output _channel
    """
    layers = [] # 存储生成的输出
    num_subsampling = len(num_residual_blocks)
    with tf.variable_scope('conv0'):
        conv0 = tf.layers.conv2d(x,num_channel_base,(3,3),
                            strides=(1,1),padding='same',
                            activation=tf.nn.relu,name='conv0')
        layers.append(conv0)
    # num_subsampling == num of res blocks
    for resblock_id in range(num_subsampling):
        for i in range(num_residual_blocks[resblock_id]):
            with tf.variable_scope("conv%d_%d" % (resblock_id+1,i+1)):
                conv = residual_block(layers[-1],
                                     num_channel_base*(2**resblock_id))
                layers.append(conv)
    # 维度检查
    multiplier = 2**(num_subsampling-1)
    input_size = x.get_shape().as_list()[1:]
    assert layers[-1].get_shape().as_list()[1:] == \
            [input_size[0]/multiplier,
            input_size[1]/multiplier,
            num_channel_base*multiplier]
    
    with tf.variable_scope('pool_and_fc'):
        # global pooling
        global_pool = tf.reduce_mean(layers[-1],[1,2])
        logits = tf.layers.dense(global_pool,class_num)
        '''
        [None, 4, 4, 256]
        [None, 256]
        [None, 10]
        mean 之后维度消失 
        '''
        print(layers[-1].get_shape().as_list())
        print(global_pool.get_shape().as_list())
        print(logits.get_shape().as_list())
        layers.append(logits)
    return layers[-1]


### ResNet model

In [12]:
tf.reset_default_graph()
x = tf.placeholder(tf.float32,[None,3072])
# [None] = [1,2,5,3,6..]
y = tf.placeholder(tf.int64,[None])
x_image = tf.reshape(x,[-1,3,32,32])
x_image = tf.transpose(x_image,perm=[0,2,3,1])
# 构建网络
y_predict = res_net(x_image,[2,3,3,2],32,10)

loss = tf.losses.sparse_softmax_cross_entropy(labels=y,logits=y_predict)
correct_prediction = tf.equal(tf.argmax(y_predict,1),y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float64))

with tf.name_scope('train_op'):
    train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)
    

[None, 4, 4, 256]
[None, 256]
[None, 10]


### load data

In [3]:
def load_data(filename):
    """read data from data file."""
    with open(filename, 'rb') as f:
        data = pickle.load(f, encoding='bytes')
        return data[b'data'], data[b'labels']

# tensorflow.Dataset.
class CifarData:
    def __init__(self, filenames, need_shuffle):
        all_data = []
        all_labels = []
        for filename in filenames:
            data, labels = load_data(filename)
            all_data.append(data)
            all_labels.append(labels)
        # hstack vstack 用于合并二维数组变成矩阵形式
        self._data = np.vstack(all_data)
        self._data = self._data / 127.5 - 1
        # 小心
        self._labels = np.hstack(all_labels)
        print(self._data.shape)
        print(self._labels.shape)
        
        self._num_examples = self._data.shape[0]
        self._need_shuffle = need_shuffle
        self._indicator = 0
        if self._need_shuffle:
            self._shuffle_data()
            
    def _shuffle_data(self):
        # [0,1,2,3,4,5] -> [5,3,2,4,0,1]
        p = np.random.permutation(self._num_examples)
        self._data = self._data[p]
        self._labels = self._labels[p]
    
    def next_batch(self, batch_size):
        """return batch_size examples as a batch."""
        end_indicator = self._indicator + batch_size
        if end_indicator > self._num_examples:
            if self._need_shuffle:
                self._shuffle_data()
                self._indicator = 0
                end_indicator = batch_size
            else:
                raise Exception("have no more examples")
        if end_indicator > self._num_examples:
            raise Exception("batch size is larger than all examples")
        batch_data = self._data[self._indicator: end_indicator]
        batch_labels = self._labels[self._indicator: end_indicator]
        self._indicator = end_indicator
        return batch_data, batch_labels

train_filenames = [os.path.join(CIFAR_DIR, 'data_batch_%d' % i) for i in range(1, 6)]
test_filenames = [os.path.join(CIFAR_DIR, 'test_batch')]

train_data = CifarData(train_filenames, True)
test_data = CifarData(test_filenames, False)

(50000, 3072)
(50000,)
(10000, 3072)
(10000,)


### train

In [16]:
# cfg = tf.ConfigProto(allow_soft_placement=True )
# cfg.gpu_options.allow_growth = True
config = tf.ConfigProto()
config.gpu_options.allow_growth = True 

init = tf.global_variables_initializer()
batch_size = 20
train_steps = 1000
test_steps = 100

# train 10k: 71.35%
with tf.Session(config=config) as sess:
    sess.run(init)
    for i in range(train_steps):
        batch_data, batch_labels = train_data.next_batch(batch_size)
        loss_val, acc_val, _ = sess.run(
            [loss, accuracy, train_op],
            feed_dict={
                x: batch_data,
                y: batch_labels})
        if (i+1) % 50 == 0:
            print('[Train] Step: %d, loss: %4.5f, acc: %4.5f' % (i+1, loss_val, acc_val))
        if (i+1) % 200 == 0:
            test_data = CifarData(test_filenames, False)
            all_test_acc_val = []
            for j in range(test_steps):
                test_batch_data, test_batch_labels \
                    = test_data.next_batch(batch_size)
                test_acc_val = sess.run(
                    [accuracy],
                    feed_dict = {
                        x: test_batch_data, 
                        y: test_batch_labels
                    })
                all_test_acc_val.append(test_acc_val)
            test_acc = np.mean(all_test_acc_val)
            print('[Test ] Step: %d, acc: %4.5f' % (i+1, test_acc))

UnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
	 [[node conv0/conv0/Conv2D (defined at <ipython-input-11-b18bdbe21add>:48)  = Conv2D[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](Reshape, conv0/conv0/kernel/read)]]
	 [[{{node Mean/_13}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_1364_Mean", tensor_type=DT_DOUBLE, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op 'conv0/conv0/Conv2D', defined at:
  File "d:\anacoda_install\Lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "d:\anacoda_install\Lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "e:\environments\mytf\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "e:\environments\mytf\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "e:\environments\mytf\lib\site-packages\ipykernel\kernelapp.py", line 505, in start
    self.io_loop.start()
  File "e:\environments\mytf\lib\site-packages\tornado\platform\asyncio.py", line 148, in start
    self.asyncio_loop.run_forever()
  File "d:\anacoda_install\Lib\asyncio\base_events.py", line 427, in run_forever
    self._run_once()
  File "d:\anacoda_install\Lib\asyncio\base_events.py", line 1440, in _run_once
    handle._run()
  File "d:\anacoda_install\Lib\asyncio\events.py", line 145, in _run
    self._callback(*self._args)
  File "e:\environments\mytf\lib\site-packages\tornado\ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "e:\environments\mytf\lib\site-packages\tornado\ioloop.py", line 743, in _run_callback
    ret = callback()
  File "e:\environments\mytf\lib\site-packages\tornado\gen.py", line 787, in inner
    self.run()
  File "e:\environments\mytf\lib\site-packages\tornado\gen.py", line 748, in run
    yielded = self.gen.send(value)
  File "e:\environments\mytf\lib\site-packages\ipykernel\kernelbase.py", line 365, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "e:\environments\mytf\lib\site-packages\tornado\gen.py", line 209, in wrapper
    yielded = next(result)
  File "e:\environments\mytf\lib\site-packages\ipykernel\kernelbase.py", line 272, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "e:\environments\mytf\lib\site-packages\tornado\gen.py", line 209, in wrapper
    yielded = next(result)
  File "e:\environments\mytf\lib\site-packages\ipykernel\kernelbase.py", line 542, in execute_request
    user_expressions, allow_stdin,
  File "e:\environments\mytf\lib\site-packages\tornado\gen.py", line 209, in wrapper
    yielded = next(result)
  File "e:\environments\mytf\lib\site-packages\ipykernel\ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "e:\environments\mytf\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "e:\environments\mytf\lib\site-packages\IPython\core\interactiveshell.py", line 2854, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "e:\environments\mytf\lib\site-packages\IPython\core\interactiveshell.py", line 2880, in _run_cell
    return runner(coro)
  File "e:\environments\mytf\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "e:\environments\mytf\lib\site-packages\IPython\core\interactiveshell.py", line 3057, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "e:\environments\mytf\lib\site-packages\IPython\core\interactiveshell.py", line 3248, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "e:\environments\mytf\lib\site-packages\IPython\core\interactiveshell.py", line 3325, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-12-5790739598b2>", line 8, in <module>
    y_predict = res_net(x_image,[2,3,3,2],32,10)
  File "<ipython-input-11-b18bdbe21add>", line 48, in res_net
    activation=tf.nn.relu,name='conv0')
  File "e:\environments\mytf\lib\site-packages\tensorflow\python\layers\convolutional.py", line 417, in conv2d
    return layer.apply(inputs)
  File "e:\environments\mytf\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 817, in apply
    return self.__call__(inputs, *args, **kwargs)
  File "e:\environments\mytf\lib\site-packages\tensorflow\python\layers\base.py", line 374, in __call__
    outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
  File "e:\environments\mytf\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 757, in __call__
    outputs = self.call(inputs, *args, **kwargs)
  File "e:\environments\mytf\lib\site-packages\tensorflow\python\keras\layers\convolutional.py", line 194, in call
    outputs = self._convolution_op(inputs, self.kernel)
  File "e:\environments\mytf\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 868, in __call__
    return self.conv_op(inp, filter)
  File "e:\environments\mytf\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 520, in __call__
    return self.call(inp, filter)
  File "e:\environments\mytf\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 204, in __call__
    name=self.name)
  File "e:\environments\mytf\lib\site-packages\tensorflow\python\ops\gen_nn_ops.py", line 1044, in conv2d
    data_format=data_format, dilations=dilations, name=name)
  File "e:\environments\mytf\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "e:\environments\mytf\lib\site-packages\tensorflow\python\util\deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "e:\environments\mytf\lib\site-packages\tensorflow\python\framework\ops.py", line 3274, in create_op
    op_def=op_def)
  File "e:\environments\mytf\lib\site-packages\tensorflow\python\framework\ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

UnknownError (see above for traceback): Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
	 [[node conv0/conv0/Conv2D (defined at <ipython-input-11-b18bdbe21add>:48)  = Conv2D[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](Reshape, conv0/conv0/kernel/read)]]
	 [[{{node Mean/_13}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_1364_Mean", tensor_type=DT_DOUBLE, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
