In [1]:
from ssd.model import  ResNet
import torch.nn as nn
import torch
from torchvision.models.resnet import  resnet50
from torchsummary import summary
from ssd.model import SSD300

In [22]:
class SSD300(nn.Module):
    def __init__(self, backbone=ResNet("resnet50")):
        super().__init__()
        self.feature_extractor = backbone

        self.label_num = 81  # number of COCO classes
        self._build_additional_features(self.feature_extractor.out_channels)
        self.num_defaults = [4, 6, 6, 6, 4, 4]
        self.loc = []  # get loc from feat map
        self.conf = []  # get prob class

        for nd, oc in zip(self.num_defaults, self.feature_extractor.out_channels):
            self.loc.append(nn.Conv2d(oc, nd * 4, kernel_size=3, padding=1))
            self.conf.append(
                nn.Conv2d(oc, nd * self.label_num, kernel_size=3, padding=1)
            )

        self.loc = nn.ModuleList(self.loc)
        self.conf = nn.ModuleList(self.conf)
        

    def _build_additional_features(self, input_size):
        self.additional_blocks = []
        for i, (input_size, output_size, channels) in enumerate(
            zip(input_size[:-1], input_size[1:], [256, 256, 128, 128, 128])
        ):
            if i < 3:
                layer = nn.Sequential(
                    nn.Conv2d(input_size, channels, kernel_size=1, bias=False),
                    nn.BatchNorm2d(channels),
                    nn.ReLU(inplace=True),
                    nn.Conv2d(
                        channels,
                        output_size,
                        kernel_size=3,
                        padding=1,
                        stride=2,
                        bias=False,
                    ),
                    nn.BatchNorm2d(output_size),
                    nn.ReLU(inplace=True),
                )
            else:
                layer = nn.Sequential(
                    nn.Conv2d(input_size, channels, kernel_size=1, bias=False),
                    nn.BatchNorm2d(channels),
                    nn.ReLU(inplace=True),
                    nn.Conv2d(channels, output_size,
                              kernel_size=3, bias=False),
                    nn.BatchNorm2d(output_size),
                    nn.ReLU(inplace=True),
                )

            self.additional_blocks.append(layer)
        self.additional_blocks = nn.ModuleList(self.additional_blocks)
    
    def bbox_view(self, src, loc, conf):
        ret = []
        for s, l, c in zip(src, loc, conf):
            print(l(s).shape)
            ret.append(
                (
                    l(s).reshape(s.size(0), 4, -1),
                    c(s).reshape(s.size(0), self.label_num, -1),
                )
            )
            break




    def forward(self, x):
        x = self.feature_extractor(x)

        detection_feed = [x]
        for l in self.additional_blocks:
            x = l(x)
            detection_feed.append(x)  # fter con layer

        # Feature Map 38x38x4, 19x19x6, 10x10x6, 5x5x6, 3x3x4, 1x1x4
        
        self.bbox_view(detection_feed, self.loc, self.conf)
        # For SSD 300, shall return nbatch x 8732 x {nlabels, nlocs} results
        return detection_feed
    
model = SSD300()

model.eval()
inp = torch.rand((1,3,300,300))
d1 = model(inp)
print(d1[5].shape)

torch.Size([1, 16, 38, 38])
torch.Size([1, 256, 1, 1])


In [23]:
38 * 38 * 16 / 4

5776.0

In [18]:
model = SSD300()

model.eval()
inp = torch.rand((1,3,300,300))
d1 = model(inp)
print(d1[5].shape)

torch.Size([1, 256, 1, 1])


In [None]:
model.additional_blocks

In [None]:
#model.feature_extractor
#summary(model.feature_extractor, (3, 300, 300))

In [None]:
backbone = ResNet()


summary(backbone, (3, 300, 300))

In [None]:
backbone1 = resnet50()
feature_extractor = nn.Sequential(*list(backbone1.children())[:7])
summary(feature_extractor, (3, 300, 300))