In [29]:
# パッケージのimport
from math import sqrt
from itertools import product

import pandas as pd
import torch
from torch.autograd import Function
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init

### VGG module

In [30]:
def VGGModule():
    layers=[]
    #faster than list()
    in_channels=3
    cfg=[64,64,'M',128,128,'M',256,256,256,'MC',512,512,512,'M',512,512,512]
    for v in cfg:
        if v=='M':
            layers.append(nn.MaxPool2d(kernel_size=2,stride=2))
            # faster than '+='
        elif v=='MC':
            layers.append(nn.MaxPool2d(kernel_size=2,stride=2,ceil_mode=True))
            #ceil mode: float 올림
            #floor mode(default): float 내림
        else:
            layers+=[
                nn.Conv2d(in_channels,v,kernel_size=3,padding=1),
                nn.ReLU(inplace=True)
            ]
            #faster than .extend()
            in_channels=v
    
    layers+=[
        nn.MaxPool2d(kernel_size=3,stride=1,padding=1),
        nn.Conv2d(512,1024,kernel_size=3,padding=6,dilation=6),
        nn.ReLU(inplace=True),
        nn.Conv2d(1024,1024,kernel_size=1),
        nn.ReLU(inplace=True)]
    return nn.ModuleList(layers)

In [31]:
'''check'''
VGGModule()

ModuleList(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
  (17): Conv2d(256, 512, kernel_siz

### extra module

In [32]:
def extrasModule():
    in_channels=1024 #output of vgg module
    cfg=[256,512,128,256,128,256,128,256]
    layers=[
        nn.Conv2d(in_channels,cfg[0],kernel_size=(1)),
        nn.Conv2d(cfg[0],cfg[1],kernel_size=(3),stride=2,padding=1),
        nn.Conv2d(cfg[1],cfg[2],kernel_size=(1)),
        nn.Conv2d(cfg[2],cfg[3],kernel_size=(3),stride=2,padding=1),
        nn.Conv2d(cfg[3],cfg[4],kernel_size=(1)),
        nn.Conv2d(cfg[4],cfg[5],kernel_size=(3)),
        nn.Conv2d(cfg[5],cfg[6],kernel_size=(1)),
        nn.Conv2d(cfg[6],cfg[7],kernel_size=(3)),
    ]
    #activation function(ReLU)은 foward propagation 부분에서
    return nn.ModuleList(layers)
    

In [33]:
'''check'''
extrasModule()

ModuleList(
  (0): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
  (1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (2): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
  (3): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (4): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
  (5): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
  (6): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
  (7): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
)

### loc conf module

In [34]:
def locConfModule(num_classes=21,bbox_aspect_num=[4,6,6,6,4,4]):

    loc_layers,conf_layers=[],[]
    cfg=[512,1024,512,256,256,256]
    
    for idx,v in enumerate(cfg):
        loc_layers.append(nn.Conv2d(v,bbox_aspect_num[idx]*4,kernel_size=3,padding=1))
        conf_layers.append(nn.Conv2d(v,bbox_aspect_num[idx]*num_classes,kernel_size=3,padding=1))

    return nn.ModuleList(loc_layers),nn.ModuleList(conf_layers)
    

In [35]:
'''check'''
locConfModule()

(ModuleList(
   (0): Conv2d(512, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   (1): Conv2d(1024, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   (2): Conv2d(512, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   (3): Conv2d(256, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   (4): Conv2d(256, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   (5): Conv2d(256, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
 ),
 ModuleList(
   (0): Conv2d(512, 84, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   (1): Conv2d(1024, 126, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   (2): Conv2d(512, 126, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   (3): Conv2d(256, 126, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   (4): Conv2d(256, 84, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   (5): Conv2d(256, 84, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
 ))

### L2 Norm layer
- 채널 방향 정규화

In [36]:
class L2Norm(nn.Module):
    
    def __init__(self,input_channels=512,scale=20):
        super(L2Norm,self).__init__()
        self.weight=nn.Parameter(torch.Tensor(input_channels))
        self.scale=scale
        self.reset_parameters()
        self.eps=1e-10

    def reset_parameters(self):
        init.constant_(self.weight,self.scale)

    def forward(self,x):
        norm=x.pow(2).sum(dim=1,keepdim=True).sqrt()+self.eps
        x=torch.div(x,norm)
    
        weights=self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x)
        out=weights*x
        return out

### Default Box Class

In [37]:
class DBox(object):
    def __init__(self,cfg):
        super(DBox,self).__init__()
        self.image_size=cfg['input_size']
        self.feature_maps=cfg['feature_maps']
        self.num_priors=len(cfg['feature_maps'])
        self.steps=cfg['steps'] #size of defaultbox pixel
        self.min_sizes=cfg['min_sizes']
        self.max_sizes=cfg['max_sizes']
        self.aspect_ratios=cfg['aspect_ratios'] #정사각형 dbox의 화면비
    
    def make_dbox_list(self):
        mean=[]
        for k,f in enumerate(self.feature_maps):
            for i,j in product(range(f),repeat=2):
                f_k=self.image_size/self.steps[k] #size of source map
                cx,cy=(j+0.5)/f_k,(i+0.5)/f_k #중심 좌표 (0-1 값으로 정규화되어있음)
                '''
                default box 네 종류
                - 6종류일 경우 3:1, 1:3 비율 추가
                '''
                #1:1 small
                s_k=self.min_sizes[k]/self.image_size 
                mean+=[cx,cy,s_k,s_k]
                #1:1 big
                s_k_big=sqrt(s_k*(self.max_sizes[k]/self.image_size))
                mean+=[cx,cy,s_k_big,s_k_big]
                #1:2,2:1
                for ar in self.aspect_ratios[k]:
                    sq=sqrt(ar)
                    mean+=[cx,cy,s_k*sq,s_k/sq]
                    mean+=[cx,cy,s_k/sq,s_k*sq]
        output=torch.Tensor(mean).view(-1,4) #dbox를 tensor 형태로 변환
        output.clamp_(max=1,min=0) #크기를 최소 0, 최대 1로 바꿔 dbox가 화면 밖으로 나가지 않도록 함
        return output

In [38]:
'''check'''
ssd_cfg={
    'num_classes':21,
    'input_size':300,
    'bbox_aspect_num':[4,6,6,6,4,4],
    'feature_maps':[38,19,10,5,3,1],
    'steps':[8,16,32,64,100,300],
    'min_sizes':[30,60,111,162,213,264],
    'max_sizes':[60,111,162,213,264,315],
    'aspect_ratios':[[2],[2,3],[2,3],[2,3],[2],[2]]
}

dbx=DBox(ssd_cfg)
dbxlist=dbx.make_dbox_list()
pd.DataFrame(dbxlist.numpy())

Unnamed: 0,0,1,2,3
0,0.013333,0.013333,0.100000,0.100000
1,0.013333,0.013333,0.141421,0.141421
2,0.013333,0.013333,0.141421,0.070711
3,0.013333,0.013333,0.070711,0.141421
4,0.040000,0.013333,0.100000,0.100000
...,...,...,...,...
8727,0.833333,0.833333,0.502046,1.000000
8728,0.500000,0.500000,0.880000,0.880000
8729,0.500000,0.500000,0.961249,0.961249
8730,0.500000,0.500000,1.000000,0.622254
