- [ ] Generate default boxes
- [ ] Compute IoU
- [ ] Compute targets
- [ ] Compute Losses: Smooth L1 & Confidence Loss
- [x] SSD Model
- [ ] Init Model with VGG PreTrained
- [ ] Init Model with SSD PreTrained
- [ ] Train step
- [ ] Data Augmentation

In [1]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import VGG16

In [2]:
# vgg = VGG16(weights='imagenet', input_shape=(300, 300, 3), include_top=False)

In [3]:
def make_vgg16_layers():
    return [
        layers.Conv2D(64, (3, 3), activation='relu', padding='same', name='conv1_1'),
        layers.Conv2D(64, 3, padding='same', activation='relu', name='conv1_2'),
        layers.MaxPool2D(2, 2, padding='same', name='pool1'),

        layers.Conv2D(128, 3, padding='same', activation='relu', name='conv2_1'),
        layers.Conv2D(128, 3, padding='same', activation='relu', name='conv2_2'),
        layers.MaxPool2D(2, 2, padding='same', name='pool2'),

        layers.Conv2D(256, 3, padding='same', activation='relu', name='conv3_1'),
        layers.Conv2D(256, 3, padding='same', activation='relu', name='conv3_2'),
        layers.Conv2D(256, 3, padding='same', activation='relu', name='conv3_3'),
        layers.MaxPool2D(2, 2, padding='same', name='pool3'),

        layers.Conv2D(512, 3, padding='same', activation='relu', name='conv4_1'),
        layers.Conv2D(512, 3, padding='same', activation='relu', name='conv4_2'),
        layers.Conv2D(512, 3, padding='same', activation='relu', name='conv4_3'),
        layers.MaxPool2D(2, 2, padding='same', name='pool4'),

        layers.Conv2D(512, 3, padding='same', activation='relu', name='conv5_1'),
        layers.Conv2D(512, 3, padding='same', activation='relu', name='conv5_2'),
        layers.Conv2D(512, 3, padding='same', activation='relu', name='conv5_3'),

        # different from original vgg16
        # pool5 now has ksize = 3, stride = 1
        layers.MaxPool2D(3, 1, padding='same', name='pool5'),
        # atrous 6-th conv
        layers.Conv2D(1024, 3, padding='same', dilation_rate=6, activation='relu', name='conv6'),
        # 7-th conv
        layers.Conv2D(1024, 1, padding='same', activation='relu', name='conv7'),
    ]

In [4]:
def make_extra_layers():
    return [        
        # 8-th
        layers.Conv2D(256, 1, activation='relu', name='conv8_1'),
        layers.Conv2D(512, 3, strides=2, padding='same', activation='relu', name='conv8_2'),

        # 9-th
        layers.Conv2D(128, 1, activation='relu', name='conv9_1'),
        layers.Conv2D(256, 3, strides=2, padding='same', activation='relu', name='conv9_2'),

        # 10-th
        layers.Conv2D(128, 1, activation='relu', name='conv10_1'),
        layers.Conv2D(256, 3, strides=1, activation='relu', name='conv10_2'),

        # 11-th
        layers.Conv2D(128, 1, activation='relu', name='conv11_1'),
        layers.Conv2D(256, 3, strides=1, activation='relu', name='conv11_2'),
    ]

def make_conf_heads_layers(num_classes):
    return [
        layers.Conv2D(4 * num_classes, 3, padding='same', name='conf_4'), # 4th block - 4 default boxes
        layers.Conv2D(6 * num_classes, 3, padding='same', name='conf_7'), # 7th block - 6 default boxes
        layers.Conv2D(6 * num_classes, 3, padding='same', name='conf_8'), # 8th block - 6 default boxes
        layers.Conv2D(6 * num_classes, 3, padding='same', name='conf_9'), # 9th block - 6 default boxes
        layers.Conv2D(4 * num_classes, 3, padding='same', name='conf_10'), # 10th block - 4 default boxes
        layers.Conv2D(4 * num_classes, 3, padding='same', name='conf_11'), # 11th block - 4 default boxes        
    ]

def make_loc_heads_layers():
    return [
        layers.Conv2D(4 * 4, 3, padding='same', name='loc_4'), # 4th block - 4 default boxes
        layers.Conv2D(6 * 4, 3, padding='same', name='loc_7'), # 7th block - 6 default boxes
        layers.Conv2D(6 * 4, 3, padding='same', name='loc_8'), # 8th block - 6 default boxes
        layers.Conv2D(6 * 4, 3, padding='same', name='loc_9'), # 9th block - 6 default boxes
        layers.Conv2D(4 * 4, 3, padding='same', name='loc_10'), # 10th block - 4 default boxes
        layers.Conv2D(4 * 4, 3, padding='same', name='loc_11'), # 11th block - 4 default boxes        
    ]

In [8]:
class SSD(keras.Model):
    
    def __init__(self, num_classes=20):
        super().__init__()
        
        self.vgg_layers = make_vgg16_layers()
        self.extra_layers = make_extra_layers()
        self.conf_layers = make_conf_heads_layers(num_classes)
        self.loc_layers = make_loc_heads_layers()
        self.out_layers = ["conv4_3", "conv7", "conv8_2", "conv9_2", "conv10_2", "conv11_2"]
        
    def get_conf_loc(self, fm, index):
        conf = self.conf_layers[index](fm)
        loc = self.loc_layers[index](fm)
        return conf, loc

    def call(self, x):
        conf_outputs = []
        loc_outputs = []
        
        head_index = 0
        for layer in self.vgg_layers:
            x = layer(x)
            if layer.name in self.out_layers:
                conf, loc = self.get_conf_loc(x, head_index)
                conf_outputs.append(conf)
                loc_outputs.append(loc)
                head_index += 1
        
        for layer in self.extra_layers:
            x = layer(x)
            if layer.name in self.out_layers:
                conf, loc = self.get_conf_loc(x, head_index)
                conf_outputs.append(conf)
                loc_outputs.append(loc)
                head_index += 1
            
        return conf_outputs, loc_outputs
    
    def model(self):
        inputs = layers.Input(shape=(300, 300, 3), name='input_image')
        model = keras.Model(inputs=inputs, outputs=self.call(inputs))
        return model

In [9]:
m = SSD()

In [10]:
m.model().summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_image (InputLayer)        [(None, 300, 300, 3) 0                                            
__________________________________________________________________________________________________
conv1_1 (Conv2D)                (None, 300, 300, 64) 1792        input_image[0][0]                
__________________________________________________________________________________________________
conv1_2 (Conv2D)                (None, 300, 300, 64) 36928       conv1_1[0][0]                    
__________________________________________________________________________________________________
pool1 (MaxPooling2D)            (None, 150, 150, 64) 0           conv1_2[0][0]                    
____________________________________________________________________________________________

# Generate anchor boxes

In [139]:
from collections import OrderedDict

def create_anchor_boxes(verbose=False):
    anchors_config = OrderedDict({
        "conv4_3": {
            "size": 38,
            "scale": 0.1,
            "ratios": [1, 2, 0.5],
        },
        "conv7": {
            "size": 19,
            "scale": 0.2,
            "ratios": [1, 2, 0.5, 3, 0.33],
        },
        "conv8_2": {
            "size": 10,
            "scale": 0.375,
            "ratios": [1, 2, 0.5, 3, 0.33],
        },
        "conv9_2": {
            "size": 5,
            "scale": 0.55,
            "ratios": [1, 2, 0.5, 3, 0.33],
        },
        "conv10_2": {
            "size": 3,
            "scale": 0.725,
            "ratios": [1, 2, 0.5],
        },
        "conv11_2": {
            "size": 1,
            "scale": 0.9,
            "ratios": [1, 2, 0.5],
        },
    })
    
    def get_next_scale(current_name):
        def get_scale(name):
            return anchors_config[name]['scale']
        try:
            keys = list(anchors_config.keys())
            current_index = keys.index(current_name)
            next_name = keys[current_index+1]
            next_scale = get_scale(next_name)
        except:
            next_scale = get_scale(current_name)
        return next_scale
    
    def create_anchor(cx, cy, scale, ratio):
        # cx, cy, w, h
        return [cx, cy, scale * math.sqrt(ratio), scale / math.sqrt(ratio)]

    anchors = []

    for name, config in anchors_config.items():
        fm_size = config['size']
        scale = config['scale']
        ratios = config['ratios']
        next_scale = get_next_scale(name)
        
        n_anchors = 0
        for i in range(fm_size):
            for j in range(fm_size):
                cx = (i + 0.5) / fm_size
                cy = (j + 0.5) / fm_size
                for ratio in ratios:
                    anchors.append(create_anchor(cx, cy, scale, ratio))
                    n_anchors += 1
                    # add one extra anchors with ratio of 1 and scale that is the geometric mean of current and next map
                    if ratio == 1:
                        extra_scale = math.sqrt(scale * next_scale)
                        anchors.append(create_anchor(cx, cy, extra_scale, ratio))
                        n_anchors += 1
        if verbose:
            print("feature map =", "{:8s}".format(name), 
                  "# anchors =", "{:5d}".format(n_anchors), 
                  "scale =", "{:.3f}".format(scale), " ratios =", ratios)
    if verbose:
        print("# anchors = ", len(anchors))
    return anchors

In [140]:
anchors = create_anchor_boxes(True)

feature map = conv4_3  # anchors =  5776 scale = 0.100  ratios = [1, 2, 0.5]
feature map = conv7    # anchors =  2166 scale = 0.200  ratios = [1, 2, 0.5, 3, 0.33]
feature map = conv8_2  # anchors =   600 scale = 0.375  ratios = [1, 2, 0.5, 3, 0.33]
feature map = conv9_2  # anchors =   150 scale = 0.550  ratios = [1, 2, 0.5, 3, 0.33]
feature map = conv10_2 # anchors =    36 scale = 0.725  ratios = [1, 2, 0.5]
feature map = conv11_2 # anchors =     4 scale = 0.900  ratios = [1, 2, 0.5]
# anchors =  8732


# Visualize anchor boxes

In [142]:
anchors[0]

[0.013157894736842105, 0.013157894736842105, 0.1, 0.1]