In [26]:
# https://github.com/pytorch/examples/blob/master/mnist/main.py
from __future__ import print_function 
import argparse 
import torch 
from torch import cuda
import torch.nn as nn 
import torch.nn.functional as F 
import torch.optim as optim 
from torchvision import datasets, transforms 
from torch.autograd import Variable

import numpy as np

In [None]:
# Training settings 
batch_size = 64
device = 'cuda' if cuda.is_available() else 'cpu'

# MNIST Dataset 
train_dataset = datasets.MNIST(root='./data/',
                               train=True,
                              transform=transforms.ToTensor(),
                              download=False)
test_dataset = datasets.MNIST(root='./data/',
                             train=False,
                             transform=transforms.ToTensor()) 

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                          batch_size=batch_size,
                                          shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                         batch_size=batch_size,
                                         shuffle=False)
# Model generation
class InceptionA(nn.Module):
    
    def __init__(self,in_channels):
        super(InceptionA,self).__init__()
        self.branch1x1 = nn.Conv2d(in_channels,16,kernel_size=1)
        
        self.branch5x5_1 = nn.Conv2d(in_channels,16,kernel_size=1)
        self.branch5x5_2 = nn.Conv2d(16,24,kernel_size=5,padding=2)
        
        self.branch3x3db1_1 = nn.Conv2d(in_channels,16,kernel_size=1)
        self.branch3x3db1_2 = nn.Conv2d(16,24,kernel_size=3,padding=1)
        self.branch3x3db1_3 = nn.Conv2d(24,24,kernel_size=3,padding=1)
        
        self.branch_pool = nn.Conv2d(in_channels, 24, kernel_size=1)
        
        #kernel_size =1 means a 1x1 convolution filter
        # it can perform a  "Number of depth-demensional dot product"
        
    def forward(self,x):
        branch1x1 = self.branch1x1(x)
        #print("branch1x1 shape {}".format(branch1x1.shape))
        
        branch5x5 = self.branch5x5_1(x)
        branch5x5 = self.branch5x5_2(branch5x5)
        #print("{} shape {}".format(branch5x5,branch5x5.shape))
        
        branch3x3db1 = self.branch3x3db1_1(x)
        branch3x3db1 = self.branch3x3db1_2(branch3x3db1)
        branch3x3db1 = self.branch3x3db1_3(branch3x3db1)
        #print("{} shape {}".format(branch3x3db1,branch3x3db1.shape))
        
        branch_pool = F.avg_pool2d(x,kernel_size=3,stride=1,padding=1)
        branch_pool = self.branch_pool(branch_pool)
        #print("{} shape {}".format(branch_pool,branch_pool.shape))
        
        outputs = [branch1x1, branch5x5, branch3x3db1, branch_pool]
        #print("{} shape {}".format(outputs,np.shape(outputs)))
        
        return torch.cat(outputs,1)  
        #torch.cat(tensors, dim=0, out=None) -> Tensor 
        #dim (int, optional): the dimension over which the tensors are concatenated
        # dim = 0 이면 인풋 그대로 가져다 붙이고 [[x 1행], [x 2행], [y 1행],[y 2행],[z 1행], [z 2행]] 이런식.  
        # dim = 1 이면 [[x 1행, y 1행, z 1행],[x 2행, y 2행, z 2행]] 이런식으로 concatenation 됨         
        
        
class Net(nn.Module): 
    
    def __init__(self): 
        super(Net,self).__init__()
        self.conv1 = nn.Conv2d(1,10,kernel_size=5)
        self.conv2 = nn.Conv2d(88,20,kernel_size=5) #inception output size is 88, 16+24+24+24
        
        self.incept1 = InceptionA(in_channels=10)
        self.incept2 = InceptionA(in_channels=20)
        
        self.mp = nn.MaxPool2d(2)
        self.fc = nn.Linear(1408,10)
        
    def forward(self,x):
        in_size = x.size(0)
        x = F.relu(self.mp(self.conv1(x)))   #conv1-maxpooling-relu
        x = self.incept1(x)
        x = F.relu(self.mp(self.conv2(x)))  #conv2-maxpooling-relu
        x = self.incept2(x)
        x = x.view(in_size,-1)
        x = self.fc(x) 
        return F.log_softmax(x)
    
# Model initialization     
model = Net()
optimizer = optim.SGD(model.parameters(),lr=0.01,momentum=0.5)

# train 
def train(epoch):
    model.train()
    for batch_idx, (data,target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output,target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0: 
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
# test            
def test():
    model.eval()
    test_loss = 0 
    correct = 0 
    for data, target in test_loader: 
        data, target = Variable(data, volatile = True), Variable(target) 
                                    # volatile ?? 
        output = model(data) 
        # sum up batch loss 
        test_loss += F.nll_loss(output, target, size_average=False).data[0]
                                                # size_average ??? 
        #get the index of the max log-probability 
        pred = output.data.max(1,keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()
    
    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
# main    
if __name__ is '__main__': 
    for epoch in range(1,10):
        train(epoch)
        test()

In [41]:
```python
x = torch.randn(6,3)
y = torch.zeros(6,3)
z = torch.ones(6,3)
torch.cat((x,y,z),1)

tensor([[-0.2736, -0.0132,  0.1037,  0.0000,  0.0000,  0.0000,  1.0000,  1.0000,
          1.0000],
        [ 2.4795,  0.2064,  0.4394,  0.0000,  0.0000,  0.0000,  1.0000,  1.0000,
          1.0000],
        [ 0.4044, -0.3779,  0.6910,  0.0000,  0.0000,  0.0000,  1.0000,  1.0000,
          1.0000],
        [-0.8426, -0.5520,  0.1906,  0.0000,  0.0000,  0.0000,  1.0000,  1.0000,
          1.0000],
        [ 0.1362, -0.6483,  0.1413,  0.0000,  0.0000,  0.0000,  1.0000,  1.0000,
          1.0000],
        [ 0.3161, -0.6680,  2.5854,  0.0000,  0.0000,  0.0000,  1.0000,  1.0000,
          1.0000]])

![inception_architecture](./img/inception-module_.jpg)

![inception module](./img/inception-module.jpg)

image 출처 :https://towardsdatascience.com/a-simple-guide-to-the-versions-of-the-inception-network-7fc52b863202

![inception-module](./img/stacking_layer_problem.jpg)

![inception-module](./img/sol1_depp_residual_layer.jpg)

![inception-module](./img/Imagenet_experiments.jpg)

image 출처 https://www.youtube.com/watch?v=hqYfqNAQIjE&list=PLlMkM4tgfjnJ3I-dbhO9JTw7gNty6o_2m&index=11