In [48]:
import torch
from torch import nn, Tensor
from torchsummary import summary
from tqdm.notebook import tqdm

* 논문 : https://arxiv.org/pdf/1409.4842

In [8]:
def ConvBlock(in_channels, out_channels, **kwargs):
    block = nn.Sequential(
        nn.Conv2d(in_channels, out_channels, **kwargs),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace = True)
    )
    return block

block = ConvBlock(3,64, kernel_size = 1, stride = 1, padding = 0)
summary(block, (3,32,32), device = 'cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]             256
       BatchNorm2d-2           [-1, 64, 32, 32]             128
              ReLU-3           [-1, 64, 32, 32]               0
Total params: 384
Trainable params: 384
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 1.50
Params size (MB): 0.00
Estimated Total Size (MB): 1.51
----------------------------------------------------------------


<img src = 'https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FUm68i%2FbtrLOfE13tf%2F4y4W0KbQzyjDOkkfxvYx0K%2Fimg.png' width = 500>

In [12]:
class Inception(nn.Module):
    def __init__(self, in_channels, n1x1, n3x3_reduce, n3x3, n5x5_reduce, n5x5, pool_proj):
        super().__init__()
        self.branch1 = nn.Sequential(
            ConvBlock(in_channels, n1x1, kernel_size = 1, stride = 1, padding = 0)
        )
        self.branch2 = nn.Sequential(
            ConvBlock(in_channels, n3x3_reduce, kernel_size = 1, stride = 1, padding = 0),
            ConvBlock(n3x3_reduce, n3x3, kernel_size = 3, stride = 1, padding = 1)
        )
        self.branch3 = nn.Sequential(
            ConvBlock(in_channels, n5x5_reduce, kernel_size = 1, stride = 1, padding = 0),
            ConvBlock(n5x5_reduce, n5x5, kernel_size = 5, stride = 1, padding = 2)
        )
        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size = 3, stride = 1, padding = 1),
            ConvBlock(in_channels, pool_proj, kernel_size = 1, stride = 1, padding = 0)
        )
    
    def forward(self, x):
        x1 = self.branch1(x)
        x2 = self.branch2(x)
        x3 = self.branch3(x)
        x4 = self.branch4(x)
        out = torch.cat([x1,x2,x3,x4], dim = 1)
        return out

block = Inception(3, 64, 32, 64, 32, 64, 64)
imgs = torch.randn((32,3,32,32))
print(block(imgs).shape)
summary(block, (3, 32, 32), device = 'cpu')

torch.Size([32, 256, 32, 32])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]             256
       BatchNorm2d-2           [-1, 64, 32, 32]             128
              ReLU-3           [-1, 64, 32, 32]               0
            Conv2d-4           [-1, 32, 32, 32]             128
       BatchNorm2d-5           [-1, 32, 32, 32]              64
              ReLU-6           [-1, 32, 32, 32]               0
            Conv2d-7           [-1, 64, 32, 32]          18,496
       BatchNorm2d-8           [-1, 64, 32, 32]             128
              ReLU-9           [-1, 64, 32, 32]               0
           Conv2d-10           [-1, 32, 32, 32]             128
      BatchNorm2d-11           [-1, 32, 32, 32]              64
             ReLU-12           [-1, 32, 32, 32]               0
           Conv2d-13           [-1, 64, 32, 32]          51,264
      Bat

<img src = 'https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FW4ED5%2FbtrL1zg5YKN%2FGksFxexWsCoWrirCv5IVj0%2Fimg.png' height = 200 width = 100>

In [17]:
class Auciliary_classifier(nn.Module):
    '''
    Linear 입력 : 128 * 4 * 4
    '''
    def __init__(self, in_channels, num_classes):
        super().__init__()
        self.block = nn.Sequential(
            nn.AvgPool2d(kernel_size = 5, stride = 3),
            nn.Conv2d(in_channels, 128, kernel_size = 1, stride = 1, padding = 0),
        )
        self.classifier = nn.Sequential(
            nn.Linear(2048, 1024),
            nn.ReLU(inplace = True),
            nn.Dropout(0.7),
            nn.Linear(1024, num_classes)
        )
        
    def forward(self, x):
        out = self.block(x)
        out = torch.flatten(out, 1)
        out = self.classifier(out)
        return out

block = Auciliary_classifier(256, 1000)
summary(block, (256, 14, 14), device = 'cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         AvgPool2d-1            [-1, 256, 4, 4]               0
            Conv2d-2            [-1, 128, 4, 4]          32,896
            Linear-3                 [-1, 1024]       2,098,176
              ReLU-4                 [-1, 1024]               0
           Dropout-5                 [-1, 1024]               0
            Linear-6                 [-1, 1000]       1,025,000
Total params: 3,156,072
Trainable params: 3,156,072
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.19
Forward/backward pass size (MB): 0.08
Params size (MB): 12.04
Estimated Total Size (MB): 12.31
----------------------------------------------------------------


<img src = 'https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FZde4w%2FbtrLZyvQUKa%2FZjrA5TuJb6fZoeigUVVny1%2Fimg.png'>

<img src = 'https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FdG1pOg%2FbtrLSu9ckKk%2FeffSxFoMQVez17zIt79Gv0%2Fimg.png'>

In [43]:
class GoogleNet(nn.Module):
    '''
    ImageNet기준 -> (3,224,224)
    '''
    def __init__(self, aux_logits = True, num_classes = 1000):
        super().__init__()
        assert aux_logits == True or aux_logits == False
        
        self.aux_logits = aux_logits
        
        self.front_block = nn.Sequential(
            nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size = 7, stride = 2, padding = 3),   # (64, 112, 112)
            nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1),                                    # (64, 56, 56)
            nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = 1, stride = 1, padding = 0),  # (64, 56, 56)
            nn.Conv2d(in_channels = 64, out_channels = 192, kernel_size = 3, stride = 1, padding = 1), # (192, 56, 56)
            nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)                                     # (192, 28, 28)
        )
        
        self.inception_a3 = Inception(192, 64, 96, 128, 16, 32, 32)                                     # (256, 28, 28) out_channels -> 256 = 64 + 128 + 32 + 32
        self.inception_b3 = Inception(256, 128, 128, 192, 32, 96, 64)                                   # (480, 28, 28) out_channels -> 480 = 128 + 192 + 96 + 64
        self.maxpool3 = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)                          # (480, 14, 14)
        self.inception_a4 = Inception(480, 192, 96, 208, 16, 48, 64)                                    # (512, 14, 14) out_channels -> 512 = 192 + 208 + 48 + 64
        self.inception_b4 = Inception(512, 160, 112, 224, 24, 64, 64)                                   # (512, 14, 14) out_channels -> 512 = 160 + 224 + 64 + 64
        self.inception_c4 = Inception(512, 128, 128, 256, 24, 64, 64)                                   # (512, 14, 14) out_channenls -> 512 =  128 + 256 + 64 + 64
        self.inception_d4 = Inception(512, 112, 144, 288, 32, 64, 64)                                   # (538, 14, 14) out_channels -> 528 = 112 + 288 + 64 + 64
        self.inception_e4 = Inception(528, 256, 160, 320, 32, 128, 128)                                 # (832, 14, 14) out_channels ->  832 = 256 + 320 + 128 + 128
        self.maxpool4 = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)                          # (832, 7, 7)
        self.inception_a5 = Inception(832, 256, 160, 320, 32, 128, 128)                                 # (832, 7, 7) out_channels ->  832 = 256 + 320 + 128 + 128
        self.inception_b5 = Inception(832, 384, 192, 384, 48, 128, 128)                                 # (1024, 7, 7) out_channels ->  1024 = 384 + 384 + 128 + 128
        self.avg = nn.AvgPool2d(kernel_size = 7, stride = 1)                                            # (1024, 1, 1)
        self.dropout = nn.Dropout(0.4)
        self.fc = nn.Linear(1024, num_classes)                                                          # (1000)
        
        if self.aux_logits:
            self.aux1 = Auciliary_classifier(512, num_classes)
            self.aux2 = Auciliary_classifier(528, num_classes)
        else:
            self.aux1 = None
            self.aux2 = None
    
    def _init_layer(self):
        for m in self.module():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode = 'fan_out', nonlinearity = 'relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
                elif isinstance(m, nn.Linear):
                    nn.init.normal_(m.weight, 0, 0.01)
                    nn.init.constant_(m.bias, 0)
    
    def forward(self, x):
        out = self.front_block(x)
        out = self.inception_a3(out)
        out = self.inception_b3(out)
        out = self.maxpool3(out)
        out = self.inception_a4(out)
        if self.aux_logits and self.training:
            aux1 = self.aux1(out)
        out = self.inception_b4(out)
        out = self.inception_c4(out)
        out = self.inception_d4(out)
        if self.aux_logits and self.training:
            aux2 = self.aux2(out)
        out = self.inception_e4(out)
        out = self.maxpool4(out)
        out = self.inception_a5(out)
        out = self.inception_b5(out)
        out = self.avg(out)
        out = torch.flatten(out,1)
        out = self.dropout(out)
        out = self.fc(out)
        if self.aux_logits and self.training:
            return out, aux1, aux2
        else:
            return out

In [38]:
block = nn.AvgPool2d(kernel_size = 7, stride = 1)
with torch.no_grad():
    block.eval()
    print(block(torch.randn((1024,7,7))).shape)

torch.Size([1024, 1, 1])


In [37]:
384 + 384 + 128 + 128

1024

In [46]:
model = GoogleNet()
summary(model, (3,224,224), device = 'cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,472
         MaxPool2d-2           [-1, 64, 56, 56]               0
            Conv2d-3           [-1, 64, 56, 56]           4,160
            Conv2d-4          [-1, 192, 56, 56]         110,784
         MaxPool2d-5          [-1, 192, 28, 28]               0
            Conv2d-6           [-1, 64, 28, 28]          12,352
       BatchNorm2d-7           [-1, 64, 28, 28]             128
              ReLU-8           [-1, 64, 28, 28]               0
            Conv2d-9           [-1, 96, 28, 28]          18,528
      BatchNorm2d-10           [-1, 96, 28, 28]             192
             ReLU-11           [-1, 96, 28, 28]               0
           Conv2d-12          [-1, 128, 28, 28]         110,720
      BatchNorm2d-13          [-1, 128, 28, 28]             256
             ReLU-14          [-1, 128,

In [47]:
imgs = torch.randn((8,3,224,224), device = 'cpu')
y_hat, aux1, aux2 = model(imgs)
print(y_hat.shape, aux1.shape, aux2.shape)
model.eval()
y_hat = model(imgs)
print(y_hat.shape)

torch.Size([8, 1000]) torch.Size([8, 1000]) torch.Size([8, 1000])
torch.Size([8, 1000])


## 학습 코드작성
* VGGNet, ResNet과 달리 auxiliary가 있기 때문에 훈련 시 criterion은 aux1, aux2를 포함하여 역전파 수행
* 평가 시 criterion은 y_hat만을 사용하여 평가함!

In [None]:
def training(self, model, data_loader, criterion, optimizer, device, batch_size):
    model.train()
    train_loss = 0.
    train_acc = 0.
    with tqdm(data_loader, unit = 'batch') as tepoch:
        for i, (X, y) in enumerate(tepoch):
            tepoch.set_description('Training')
            X = X.to(device)
            y = y.to(device)
            
            y_hat, aux1, aux2 = model(X)
            y_hat_loss = criterion(y_hat, y)
            aux1_loss = criterion(y_hat, y)
            aux2_loss = criterion(y_hat, y)
            loss = y_hat_loss + 0.3 * (aux1_loss + aux2_loss)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            iter_loss = loss.item()
            train_loss += iter_loss
            
            pred = y_hat.max(1, keepdim = True)[1]
            iter_acc = pred.eq(y.data.view_as(pred)).sum().item()
            train_acc += iter_acc
            
            tepoch.set_postfix(iteration_num = f'[{i}/{len(data_loader)}]',
                              train_iter_loss = f'{iter_loss / batch_size:.3f}',
                              train_iter_accuracy = f'{iter_acc / batch_size*100:.2f}%')
            
    train_loss = train_loss / len(data_loader)
    train_acc = train_acc / len(data_loader)
    return train_loss, train_acc

def evaluation(self, model, data_loader, criterion, device, batch_size):
    model.eval()
    valid_loss = 0.
    valid_acc = 0.
    with torch.no_grad():
        with tqdm(data_loader, unit = 'batch') as tepoch:
            for i, (X, y) in enumerate(tepoch):
                tepoch.set_description('Evaluation')
                X = X.to(device)
                y = y.to(device)
                
                y_hat = model(X)
                loss = criterion(y_hat, y)
                
                iter_loss = loss.item()
                valid_loss += iter_loss
                
                pred = y_hat.max(1, keepdim = True)[1]
                iter_acc = pred.eq(y.data.view_as(pred)).sum().item()
                valid_acc += iter_acc
                
                tepoch.set_postfix(iteration_num = f'[{i}/{len(data_loader)}]',
                                  valid_iter_loss = f'{iter_loss / batch_size:.3f}',
                                  valid_iter_accuracy = f'{iter_acc / batch_size*100:.2f}%')
                
    valid_loss = valid_loss / len(data_loader)
    valid_acc = valid_acc / len(data_loader)
    return valid_loss, valid_acc