## 定义 Inception
![](/Users/poodar/Downloads/inception_block.svg)

In [1]:
# Inception 是一个含有四个并行卷基层的块，其给予 Network in Network 做了很大改进
import mxnet as mx
from mxnet import gluon
from mxnet.gluon import nn
from mxnet import nd

In [2]:
class Inception(nn.Block):
    def __init__(self, n1_1, n2_1, n2_3, n3_1, n3_5, n4_1, **kwargs):
        super(Inception, self).__init__(**kwargs)
        # path1 1x1 convolution
        self.p1_conv_1 = nn.Conv2D(n1_1, kernel_size=1, activation='relu')
        # path2 1x1 convolution + 3x3 convolution
        self.p2_conv_1 = nn.Conv2D(n2_1, kernel_size=1, activation='relu')
        self.p2_conv_3 = nn.Conv2D(n2_3, kernel_size=3, padding=1, activation='relu') # padding = 1 使得输出的长宽和输入一致
        # path3 1x1 convolution + 5x5 convolution
        self.p3_conv_1 = nn.Conv2D(n3_1, kernel_size=1, activation='relu')
        self.p3_conv_5 = nn.Conv2D(n3_5, kernel_size=5, padding=2, activation='relu')
        # path4 3x3 MaxPooling  + 1x1 convolution
        self.p4_pool_3 = nn.MaxPool2D(pool_size=3, padding=1, strides=1)
        self.p4_conv_1 = nn.Conv2D(n4_1, kernel_size=1, activation='relu')

    def forward(self, X):
        p1 = self.p1_conv_1(X)
        p2 = self.p2_conv_3(self.p2_conv_1(X))
        p3 = self.p3_conv_5(self.p3_conv_1(X))
        p4 = self.p4_conv_1(self.p4_pool_3(X))
        return nd.concat(p1, p2, p3, p4, dim=1)

In [3]:
# test
incp = Inception(64, 96, 128, 16, 32, 32)
incp.collect_params().initialize()
X = nd.random.uniform(shape=(32, 3, 64, 64))

In [4]:
incp(X).shape

(32L, 256L, 64L, 64L)

In [5]:
# GoogLeNet将数个Inception串联在一起。
# 注意到原论文里使用了多个输出，为了简化我们这里就使用一个输出。
# 为了可以更方便的查看数据在内部的形状变化，我们对每个块使用一个nn.Sequential，然后再把所有这些块连起来。
class GoogLeNet(nn.Block):
    def __init__(self, num_classes, verbose=False, **kwargs):
        super(GoogLeNet, self).__init__(**kwargs)
        self.verbose = verbose
        with self.name_scope():
            # Block 1
            b1 = nn.Sequential()
            b1.add(
                nn.Conv2D(channels=64, kernel_size=7, strides=2, padding=3, activation='relu'),
                nn.MaxPool2D(pool_size=3, strides=2)
            )
            
            # Block 2
            b2 = nn.Sequential()
            b2.add(
                nn.Conv2D(64, kernel_size=1),
                nn.Conv2D(192, kernel_size=3, padding=1),
                nn.MaxPool2D(pool_size=3, strides=2)
            )
            
            # Block 3
            b3 = nn.Sequential()
            b3.add(
                Inception(64, 96, 128, 16, 32, 32),
                Inception(128, 128, 192, 32, 64, 64),
                nn.MaxPool2D(pool_size=3, strides=2)
            )
            
            # Block 4
            b4 = nn.Sequential()
            b4.add(
                Inception(192, 96, 208, 16, 48, 64),
                Inception(160, 112, 224, 24, 64, 64),
                Inception(128, 128, 256, 24, 64, 64),
                Inception(112, 144, 288, 32, 64, 64),
                Inception(256, 160, 320, 32, 128, 128),
                nn.MaxPool2D(pool_size=3, strides=2)
            )
            
            # Block 5
            b5 = nn.Sequential()
            b5.add(
                Inception(256, 160, 320, 32, 128, 128),
                Inception(384, 192, 384, 48, 128, 128),
                nn.AvgPool2D(pool_size=2)
            )
            
            # Block 6
            b6 = nn.Sequential()
            b6.add(
                nn.Flatten(),
                nn.Dense(num_classes)
            )
            
            # Chain blocks together
            self.net = nn.Sequential()
            self.net.add(b1, b2, b3, b4, b5, b6)
        
    def forward(self, X):
        out = X
        for i, b in enumerate(self.net):
            out = b(out)
            if self.verbose:
                print("Block %d output: %s" % (i+1, out.shape))
        return out

In [6]:
# Test
net = GoogLeNet(10, verbose=True)
net.initialize()

X = nd.random.uniform(shape=(4, 3, 96, 96))  # 4 examples, each one contains 3 channels, has width and height 96X96
y = net(X)

Block 1 output: (4L, 64L, 23L, 23L)
Block 2 output: (4L, 192L, 11L, 11L)
Block 3 output: (4L, 448L, 5L, 5L)
Block 4 output: (4L, 832L, 2L, 2L)
Block 5 output: (4L, 1024L, 1L, 1L)
Block 6 output: (4L, 10L)
