In [1]:
from mxnet.gluon import nn 
from mxnet import nd 
class Inception(nn.Block):
    def __init__(self,c1,c2,c3,c4,**kwargs):
        super(Inception,self).__init__(**kwargs)
        # 线路1，单1 x 1卷积层
        self.p1_1 = nn.Conv2D(c1, kernel_size=1, activation='relu')
        # 线路2，1 x 1卷积层后接3 x 3卷积层
        self.p2_1 = nn.Conv2D(c2[0], kernel_size=1, activation='relu')
        self.p2_2 = nn.Conv2D(c2[1], kernel_size=3, padding=1,
                              activation='relu')
        # 线路3，1 x 1卷积层后接5 x 5卷积层
        self.p3_1 = nn.Conv2D(c3[0], kernel_size=1, activation='relu')
        self.p3_2 = nn.Conv2D(c3[1], kernel_size=5, padding=2,
                              activation='relu')
        # 线路4，3 x 3最大池化层后接1 x 1卷积层
        self.p4_1 = nn.MaxPool2D(pool_size=3, strides=1, padding=1)
        self.p4_2 = nn.Conv2D(c4, kernel_size=1, activation='relu')
 
    def forward(self, x):
        p1 = self.p1_1(x)
        p2 = self.p2_2(self.p2_1(x))
        p3 = self.p3_2(self.p3_1(x))
        p4 = self.p4_2(self.p4_1(x))
        return nd.concat(p1, p2, p3, p4, dim=1)  # 在通道维上连结输出

可以看到Inception里面有4个并行的线路.
1. 单个 1 * 1 卷积 。 
2. 1 * 1 卷积接上3 * 3卷积。通常前者的通道数少于输入通道，这样减少后者的计算量。后者加上了padding=1 使得输出的长宽的输入一致 
3.同2，但换成了5 * 5 卷积
4.和1类似，但卷积前用了最大池化层 
最后将这四个并行线路的结果在通道这个纬度上合并在一起。
测试一下： 

In [2]:
# incp=Inception(64,96,128,16,32,32) 
# incp.initialize() 
# x=nd.random.uniform(shape=(32,3,64,64)) 
# incp(x).shape


In [3]:
class GoogLeNet(nn.Block):
    def __init__(self,num_classes,verbose=False,**kwargs):
        super(GoogLeNet,self).__init__(**kwargs)
        self.verbose=verbose
        with self.name_scope():
            # block 1 
            b1 = nn.Sequential()
            b1.add(nn.Conv2D(64, kernel_size=7, strides=2, padding=3, 
                             activation='relu'),
                   nn.MaxPool2D(pool_size=3, strides=2, padding=1))
            #block 2 
            b2 = nn.Sequential()
            b2.add(nn.Conv2D(64, kernel_size=1, activation='relu'),
               nn.Conv2D(192, kernel_size=3, padding=1, activation='relu'),
                nn.MaxPool2D(pool_size=3, strides=2, padding=1))
            #block3 
            b3 = nn.Sequential()
            b3.add(Inception(64, (96, 128), (16, 32), 32),
                   Inception(128, (128, 192), (32, 96), 64),
                   nn.MaxPool2D(pool_size=3, strides=2, padding=1))
            # block 4 
            b4 = nn.Sequential()
            b4.add(Inception(192, (96, 208), (16, 48), 64),
                   Inception(160, (112, 224), (24, 64), 64),
                   Inception(128, (128, 256), (24, 64), 64),
                   Inception(112, (144, 288), (32, 64), 64),
                   Inception(256, (160, 320), (32, 128), 128),
                   nn.MaxPool2D(pool_size=3, strides=2, padding=1))            
            #block 5 
            b5 = nn.Sequential()
            b5.add(Inception(256, (160, 320), (32, 128), 128),
                   Inception(384, (192, 384), (48, 128), 128),
                    nn.AvgPool2D(pool_size=2))
#                    nn.GlobalAvgPool2D())
            # block 6 
            b6=nn.Sequential()
            b6.add(
                nn.Flatten(),
                nn.Dense(num_classes) 
            )
            # chain blocks together 
            self.net=nn.Sequential()
            self.net.add(b1,b2,b3,b4,b5,b6)

    def forward(self,x):
        out=x
        for i,b in enumerate(self.net):
            out=b(out) 
            if self.verbose:
                print("Block %d output: %s "%(i+1,out.shape))
        return out
    
                
            

我们看一下每个快对输出的改变 

In [4]:
net=GoogLeNet(10,verbose=True) 
net.initialize() 
x=nd.random.uniform(shape=(4,3,96,96))
y=net(x) 

Block 1 output: (4, 64, 24, 24) 
Block 2 output: (4, 192, 12, 12) 
Block 3 output: (4, 480, 6, 6) 
Block 4 output: (4, 832, 3, 3) 
Block 5 output: (4, 1024, 1, 1) 
Block 6 output: (4, 10) 


跟VGG一样我们使用了较小的输入96 * 96来加速计算 

In [5]:
import sys 
sys.path.append('..') 
import utils 
from mxnet import gluon
from mxnet import init 
import time 
train_data,test_data=utils.load_data_fashion_mnist(
            batch_size=64,resize=96) 
ctx=utils.try_gpu() 
net=GoogLeNet(10) 
net.initialize(ctx=ctx,init=init.Xavier()) 
loss=gluon.loss.SoftmaxCrossEntropyLoss() 
trainer=gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':0.01})
start=time.time()
utils.train(train_data,test_data,net,loss,trainer,ctx,num_epochs=1) 
end=time.time()
print("time is : %f"%(end-start))

aaaaaaaa
Start training on  gpu(0)
Epoch 0. Loss: 2.264, Train acc 0.26, Test acc 0.30, Time 73.1 sec
time is : 73.074108


# 结论 
googLeNet 加入了更加结构化的Inception快来使我们可以使用更大的通道，更多的层，同事控制计算量和模型大小在合理范围内 

# 练习 
GoogLeNet 有数个后续的版本，尝试实现他们并运行看看有什么不一样 