In [9]:
%run lib.ipynb
%run l2_norm.ipynb
%run default_box.ipynb

             0         1         2         3
0     0.013333  0.013333  0.100000  0.100000
1     0.013333  0.013333  0.141421  0.141421
2     0.013333  0.013333  0.200000  0.100000
3     0.013333  0.013333  0.100000  0.200000
4     0.013333  0.040000  0.100000  0.100000
...        ...       ...       ...       ...
8727  0.833333  0.833333  0.558928  1.000000
8728  0.500000  0.500000  0.880000  0.880000
8729  0.500000  0.500000  0.961249  0.961249
8730  0.500000  0.500000  1.000000  0.679706
8731  0.500000  0.500000  0.679706  1.000000

[8732 rows x 4 columns]


In [10]:
def create_vgg():
    layers = []
    in_channels = 3
    
    configs = [64, 64, "M", 128, 128, "M",
              256, 256, 256, "MC", 512, 512, 512, "M",
              512, 512, 512]
    
    for config in configs:
        if config == "M":  # floor
            layers += [torch.nn.MaxPool2d(kernel_size=2, stride=2)]
        elif config == "MC":   # ceiling
            layers += [torch.nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
        else:
            conv2d = torch.nn.Conv2d(in_channels, config, kernel_size=3, padding=1)
            layers += [conv2d, torch.nn.ReLU(inplace=True)]
            in_channels = config
        
    pool5 = torch.nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
    conv6 = torch.nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
    conv7 = torch.nn.Conv2d(1024, 1024, kernel_size=1)
    
    layers += [pool5, conv6, torch.nn.ReLU(inplace=True), conv7, torch.nn.ReLU(inplace=True)]
    
    return torch.nn.ModuleList(layers)

In [11]:
def create_extras():
    layers = []
    in_channels = 1024
    
    configs = [256, 512, 128, 256, 128, 256, 128, 256]
    layers += [torch.nn.Conv2d(in_channels, configs[0], kernel_size=1)]
    layers += [torch.nn.Conv2d(configs[0], configs[1], kernel_size=3, stride=2, padding=1)]
    layers += [torch.nn.Conv2d(configs[1], configs[2], kernel_size=1)]
    layers += [torch.nn.Conv2d(configs[2], configs[3], kernel_size=3, stride=2, padding=1)]
    layers += [torch.nn.Conv2d(configs[3], configs[4], kernel_size=1)]
    layers += [torch.nn.Conv2d(configs[4], configs[5], kernel_size=3)]
    layers += [torch.nn.Conv2d(configs[5], configs[6], kernel_size=1)]
    layers += [torch.nn.Conv2d(configs[6], configs[7], kernel_size=3)]
    
    return torch.nn.ModuleList(layers)

In [12]:
def create_loc_conf(num_classes=21, bbox_ratio_num=[4, 6, 6, 6, 4, 4]):
    loc_layers = []
    conf_layers = []
    
    # source1
    # loc
    loc_layers += [torch.nn.Conv2d(512, bbox_ratio_num[0] * 4, kernel_size=3, padding=1)]
    # conf
    conf_layers += [torch.nn.Conv2d(512, bbox_ratio_num[0] * num_classes, kernel_size=3, padding=1)]
    
    # source2
    loc_layers += [torch.nn.Conv2d(1024, bbox_ratio_num[1] * 4, kernel_size=3, padding=1)]
    conf_layers += [torch.nn.Conv2d(1024, bbox_ratio_num[1] * num_classes, kernel_size=3, padding=1)]
    
    # source3
    loc_layers += [torch.nn.Conv2d(512, bbox_ratio_num[2] * 4, kernel_size=3, padding=1)]
    conf_layers += [torch.nn.Conv2d(512, bbox_ratio_num[2] * num_classes, kernel_size=3, padding=1)]
    
    # source4
    loc_layers += [torch.nn.Conv2d(256, bbox_ratio_num[3] * 4, kernel_size=3, padding=1)]
    conf_layers += [torch.nn.Conv2d(256, bbox_ratio_num[3] * num_classes, kernel_size=3, padding=1)]
    
    # source5
    loc_layers += [torch.nn.Conv2d(256, bbox_ratio_num[4] * 4, kernel_size=3, padding=1)]
    conf_layers += [torch.nn.Conv2d(256, bbox_ratio_num[4] * num_classes, kernel_size=3, padding=1)]
    
    # source6
    loc_layers += [torch.nn.Conv2d(256, bbox_ratio_num[5] * 4, kernel_size=3, padding=1)]
    conf_layers += [torch.nn.Conv2d(256, bbox_ratio_num[5] * num_classes, kernel_size=3, padding=1)]
    
    return torch.nn.ModuleList(loc_layers), torch.nn.ModuleList(conf_layers)

In [15]:
config = {
    "num_classes": 21,
    "input_size": 300,
    "bbox_aspect_num": [4, 6, 6, 6, 4, 4],
    "feature_maps": [38, 19, 10, 5, 3, 1],
    "steps": [8, 16, 32, 64, 100, 300], # size of default box
    "min_size": [30, 60, 111, 162, 213, 264],
    "max_size": [60, 111, 162, 213, 264, 315],
    "aspect_ratios": [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
}

class SSD(torch.nn.Module):
    def __init__(self, phase, config):
        super(SSD, self).__init__()
        self.phase = phase
        self.num_classes = config["num_classes"]
        
        # create main module
        self.vgg = create_vgg()
        self.extras = create_extras()
        self.loc, self.conf = create_loc_conf(self.num_classes, config["bbox_aspect_num"])
        self.L2Norm = L2Norm()
        
        # create default box
        dbox = DefaultBox(config)
        self.dbox_list = dbox.create_defbox()
        
        if phase == "inference":
            self.detect = Detect()

In [17]:
def decode(loc, defbox_list):
    """
    parameters:
        loc: [8732, 4]
        defbox_list: [8732, 4]
    
    returns:
        boxes [xmin, ymin, xmax, ymax]
    """
    
    boxes = torch.cat((
        defbox_list[:, :2] + 0.1 * loc[:, :2] * defbox_list[:, 2:],
        defbox_list[:, 2:] * torch.exp(loc[:, 2:] * 0.2)
    ), dim=1)
    
    boxes[:, :2] -= boxes[:, 2:]/2  # calculate xmin, ymin
    boxes[:, 2:] += boxes[:, :2]    # calculate xmax, ymax
    
    return boxes

In [16]:
if __name__ == "__main__":
#     vgg = create_vgg()
# #     print("vgg ", vgg)
#     extras = create_extras()
# #     print("extras ", extras)

#     loc, conf = create_loc_conf()
#     print("Loc ", loc)
#     print("Conf ", conf)
    ssd = SSD(phase="train", config=config)
    print(ssd)

SSD(
  (vgg): ModuleList(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, cei