# InceptionNet

## 技巧+实现
> 同样解决无法加深问题

- google研发
    - 注重工程 同样参数 更有效率(快) 更好效果(准)
- 问题：
    - 深度网络过拟合
    - 深度网络难学习
    - 深度网络参数过多 训练慢 而且虽然由于深而稀疏 但是系数减少 运算量不会减少
    - 运算量不会减少原因： 系数矩阵计算比稠密矩阵计算更慢 使用稠密计算方式运算量则不会优化
- v1(GoogleNet)
    - 分组卷积 400的输出通道拆成多个 100\*4 (1\*1 3\*3 5\*5 maxpooling) 最后concat
        - 分组实现：使用不同(大小)卷聚kernel 提取不同特征
        - 分组实现：不相互交叉 分别计算  并行
        - 分组实现：减少计算量 通过借助1\*1进一步减少计算量
- v2 使用3\*3等视野域替代
- v3 使用1\*3 3\*1代替3\*3 进一步减少参数 拓展1\*n+n\*1
- v4 进一步引入skip connection (res残差连接)
- 实现细节：
    - 对于vi版本
    - 卷积部分可以使用**前置**1\*1 实现reduce参数
    - maxpooling通常使用**后置**1\*1 实现通道数目改变
    - 通道数目参差不齐 并非严格按照2指数次方增长

In [2]:
import tensorflow as tf
import os
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

CIFAR_DIR = './cifar-10-batches-py'
print(os.listdir(CIFAR_DIR))

['batches.meta', 'data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5', 'readme.html', 'test_batch']


### inception block

In [12]:
def inception_block(x,output_channel_for_each_path,
                    conv_kernel_for_each_path,name):
    """inception block implementation"""
    """
    Args:
    - output_channel_for_each_path: a.g.:[16,32,32] pooling不变通道数目
    - conv_kernel_for_each_path: e.g.:default [(1,1),(3,3),(5,5)]
    - name: 区分当前inception block
    - 此处使用1*1 3*3 5*5 + maxpooling
    """
    with tf.variable_scope(name):
        assert len(conv_kernel_for_each_path) == len(output_channel_for_each_path)
        layers = []
        for i in range(len(conv_kernel_for_each_path)):
            conv_now = tf.layers.conv2d(x,
                                        output_channel_for_each_path[i],
                                        conv_kernel_for_each_path[i],
                                        strides=(1,1),padding='same',
                                        activation=tf.nn.relu,
                                        name='conv%d_%d'%(conv_kernel_for_each_path[i][0],conv_kernel_for_each_path[i][1]))
            layers.append(conv_now)
        max_pooling = tf.layers.max_pooling2d(x,(2,2),(2,2),name='max_pooling')
        # 拼接 完成对于maxpooling的填充
        max_poolimg_shape = max_pooling.get_shape().as_list()[1:]
        input_shape = x.get_shape().as_list()[1:]
        width_padding = (input_shape[0]-max_poolimg_shape[0])//2
        height_padding = (input_shape[1]-max_poolimg_shape[1])//2
        padded_pooling = tf.pad(max_pooling,
                                [[0,0],
                                [width_padding,width_padding],
                                [height_padding,height_padding],
                                [0,0]])
        # 将输出通道变得相同 不是必须的
        conved_pooling = tf.layers.conv2d(padded_pooling,
                                          output_channel_for_each_path[0],
                                          (1,1),padding='same',
                                          activation='relu',
                                          name='nonlinear_conv1_1'
                                         )
        layers.append(conved_pooling)
#         concat_layer = tf.concat([layer for layer in layers]+padded_pooling,
#                                 axis = 3)
        concat_layer = tf.concat([layer for layer in layers],
                                 axis = 3)
        return concat_layer
    

        

### model 

In [15]:
tf.reset_default_graph()
x = tf.placeholder(tf.float32, [None, 3072])
y = tf.placeholder(tf.int64, [None])
# [None], eg: [0,5,6,3]
x_image = tf.reshape(x, [-1, 3, 32, 32])
# 32*32
x_image = tf.transpose(x_image, perm=[0, 2, 3, 1])
# model constitute
conv1 = tf.layers.conv2d(x_image,32,(3,3),padding='same',
                         activation=tf.nn.relu,name='conv1')
pooling1 = tf.layers.max_pooling2d(conv1,(2,2),(2,2),name='pool1')


inception_2a = inception_block(pooling1,[16,16,16],
                               [(1,1),(3,3),(5,5)],name='inception_2a')
inception_2b = inception_block(inception_2a,[16,16,16],
                               [(1,1),(3,3),(5,5)],name='inception_2b')
pooling2 = tf.layers.max_pooling2d(inception_2b,(2,2),(2,2),name='pool2')


inception_3a = inception_block(pooling2,[16,16,16],
                               [(1,1),(3,3),(5,5)],name='inception_3a')
inception_3b = inception_block(inception_3a,[16,16,16],
                               [(1,1),(3,3),(5,5)],name='inception_3b')
pooling3 = tf.layers.max_pooling2d(inception_3b,(2,2),(2,2),name='pool3')

# 保持batchsize维度 其他维度展平 便于fc
flatten = tf.layers.flatten(pooling3)
print(flatten.get_shape(),pooling3.get_shape())

y_predict = tf.layers.dense(flatten,10,name='fc_output')

loss = tf.losses.sparse_softmax_cross_entropy(labels=y,logits=y_predict)

correct_prediction = tf.equal(tf.argmax(y_predict,1),y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float64))

with tf.variable_scope('train_op'):
    train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)


(?, 1024) (?, 4, 4, 64)


### load data

In [16]:
def load_data(filename):
    """read data from data file."""
    with open(filename, 'rb') as f:
        data = pickle.load(f, encoding='bytes')
        return data[b'data'], data[b'labels']

# tensorflow.Dataset.
class CifarData:
    def __init__(self, filenames, need_shuffle):
        all_data = []
        all_labels = []
        for filename in filenames:
            data, labels = load_data(filename)
            all_data.append(data)
            all_labels.append(labels)
        # hstack vstack 用于合并二维数组变成矩阵形式
        self._data = np.vstack(all_data)
        self._data = self._data / 127.5 - 1
        # 小心
        self._labels = np.hstack(all_labels)
        print(self._data.shape)
        print(self._labels.shape)
        
        self._num_examples = self._data.shape[0]
        self._need_shuffle = need_shuffle
        self._indicator = 0
        if self._need_shuffle:
            self._shuffle_data()
            
    def _shuffle_data(self):
        # [0,1,2,3,4,5] -> [5,3,2,4,0,1]
        p = np.random.permutation(self._num_examples)
        self._data = self._data[p]
        self._labels = self._labels[p]
    
    def next_batch(self, batch_size):
        """return batch_size examples as a batch."""
        end_indicator = self._indicator + batch_size
        if end_indicator > self._num_examples:
            if self._need_shuffle:
                self._shuffle_data()
                self._indicator = 0
                end_indicator = batch_size
            else:
                raise Exception("have no more examples")
        if end_indicator > self._num_examples:
            raise Exception("batch size is larger than all examples")
        batch_data = self._data[self._indicator: end_indicator]
        batch_labels = self._labels[self._indicator: end_indicator]
        self._indicator = end_indicator
        return batch_data, batch_labels

train_filenames = [os.path.join(CIFAR_DIR, 'data_batch_%d' % i) for i in range(1, 6)]
test_filenames = [os.path.join(CIFAR_DIR, 'test_batch')]

train_data = CifarData(train_filenames, True)
test_data = CifarData(test_filenames, False)

(50000, 3072)
(50000,)
(10000, 3072)
(10000,)


### train

In [17]:
# cfg = tf.ConfigProto(allow_soft_placement=True )
# cfg.gpu_options.allow_growth = True
config = tf.ConfigProto()
config.gpu_options.allow_growth = True 

init = tf.global_variables_initializer()
batch_size = 20
train_steps = 1000
test_steps = 100

# train 10k: 71.35%
with tf.Session(config=config) as sess:
    sess.run(init)
    for i in range(train_steps):
        batch_data, batch_labels = train_data.next_batch(batch_size)
        loss_val, acc_val, _ = sess.run(
            [loss, accuracy, train_op],
            feed_dict={
                x: batch_data,
                y: batch_labels})
        if (i+1) % 50 == 0:
            print('[Train] Step: %d, loss: %4.5f, acc: %4.5f' % (i+1, loss_val, acc_val))
        if (i+1) % 200 == 0:
            test_data = CifarData(test_filenames, False)
            all_test_acc_val = []
            for j in range(test_steps):
                test_batch_data, test_batch_labels \
                    = test_data.next_batch(batch_size)
                test_acc_val = sess.run(
                    [accuracy],
                    feed_dict = {
                        x: test_batch_data, 
                        y: test_batch_labels
                    })
                all_test_acc_val.append(test_acc_val)
            test_acc = np.mean(all_test_acc_val)
            print('[Test ] Step: %d, acc: %4.5f' % (i+1, test_acc))

[Train] Step: 50, loss: 2.00426, acc: 0.25000
[Train] Step: 100, loss: 1.86387, acc: 0.30000
[Train] Step: 150, loss: 1.81539, acc: 0.30000
[Train] Step: 200, loss: 1.97323, acc: 0.35000
(10000, 3072)
(10000,)
[Test ] Step: 200, acc: 0.38450
[Train] Step: 250, loss: 1.51926, acc: 0.50000
[Train] Step: 300, loss: 1.56729, acc: 0.50000
[Train] Step: 350, loss: 1.64601, acc: 0.35000
[Train] Step: 400, loss: 1.46784, acc: 0.45000
(10000, 3072)
(10000,)
[Test ] Step: 400, acc: 0.43950
[Train] Step: 450, loss: 1.56069, acc: 0.50000
[Train] Step: 500, loss: 1.27828, acc: 0.65000
[Train] Step: 550, loss: 1.27106, acc: 0.40000
[Train] Step: 600, loss: 1.59575, acc: 0.30000
(10000, 3072)
(10000,)
[Test ] Step: 600, acc: 0.47100
[Train] Step: 650, loss: 1.23374, acc: 0.55000
[Train] Step: 700, loss: 1.33256, acc: 0.40000
[Train] Step: 750, loss: 1.20717, acc: 0.65000
[Train] Step: 800, loss: 1.44536, acc: 0.45000
(10000, 3072)
(10000,)
[Test ] Step: 800, acc: 0.46300
[Train] Step: 850, loss: 1.45