- [ ] Generate default boxes
- [ ] Compute IoU
- [ ] Compute targets
- [ ] Compute Losses: Smooth L1 & Confidence Loss
- [x] SSD Model
- [ ] Init Model with VGG PreTrained
- [ ] Init Model with SSD PreTrained
- [ ] Train step
- [ ] Data Augmentation

In [1]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import VGG16

In [2]:
# vgg = VGG16(weights='imagenet', input_shape=(300, 300, 3), include_top=False)

In [80]:
def make_vgg16_layers():
    return [
        layers.Conv2D(64, (3, 3), activation='relu', padding='same', name='conv1_1'),
        layers.Conv2D(64, 3, padding='same', activation='relu', name='conv1_2'),
        layers.MaxPool2D(2, 2, padding='same', name='pool1'),

        layers.Conv2D(128, 3, padding='same', activation='relu', name='conv2_1'),
        layers.Conv2D(128, 3, padding='same', activation='relu', name='conv2_2'),
        layers.MaxPool2D(2, 2, padding='same', name='pool2'),

        layers.Conv2D(256, 3, padding='same', activation='relu', name='conv3_1'),
        layers.Conv2D(256, 3, padding='same', activation='relu', name='conv3_2'),
        layers.Conv2D(256, 3, padding='same', activation='relu', name='conv3_3'),
        layers.MaxPool2D(2, 2, padding='same', name='pool3'),

        layers.Conv2D(512, 3, padding='same', activation='relu', name='conv4_1'),
        layers.Conv2D(512, 3, padding='same', activation='relu', name='conv4_2'),
        layers.Conv2D(512, 3, padding='same', activation='relu', name='conv4_3'),
        layers.MaxPool2D(2, 2, padding='same', name='pool4'),

        layers.Conv2D(512, 3, padding='same', activation='relu', name='conv5_1'),
        layers.Conv2D(512, 3, padding='same', activation='relu', name='conv5_2'),
        layers.Conv2D(512, 3, padding='same', activation='relu', name='conv5_3'),

        # different from original vgg16
        # pool5 now has ksize = 3, stride = 1
        layers.MaxPool2D(3, 1, padding='same', name='pool5'),
        # atrous 6-th conv
        layers.Conv2D(1024, 3, padding='same', dilation_rate=6, activation='relu', name='conv6'),
        # 7-th conv
        layers.Conv2D(1024, 1, padding='same', activation='relu', name='conv7'),
    ]

In [119]:
def make_extra_layers():
    return [        
        # 8-th
        layers.Conv2D(256, 1, activation='relu', name='conv8_1'),
        layers.Conv2D(512, 3, strides=2, padding='same', activation='relu', name='conv8_2'),

        # 9-th
        layers.Conv2D(128, 1, activation='relu', name='conv9_1'),
        layers.Conv2D(256, 3, strides=2, padding='same', activation='relu', name='conv9_2'),

        # 10-th
        layers.Conv2D(128, 1, activation='relu', name='conv10_1'),
        layers.Conv2D(256, 3, strides=1, activation='relu', name='conv10_2'),

        # 11-th
        layers.Conv2D(128, 1, activation='relu', name='conv11_1'),
        layers.Conv2D(256, 3, strides=1, activation='relu', name='conv11_2'),
    ]

def make_conf_heads_layers(num_classes):
    return [
        layers.Conv2D(4 * num_classes, 3, padding='same', name='conf_4'), # 4th block - 4 default boxes
        layers.Conv2D(6 * num_classes, 3, padding='same', name='conf_7'), # 7th block - 6 default boxes
        layers.Conv2D(6 * num_classes, 3, padding='same', name='conf_8'), # 8th block - 6 default boxes
        layers.Conv2D(6 * num_classes, 3, padding='same', name='conf_9'), # 9th block - 6 default boxes
        layers.Conv2D(4 * num_classes, 3, padding='same', name='conf_10'), # 10th block - 4 default boxes
        layers.Conv2D(4 * num_classes, 3, padding='same', name='conf_11'), # 11th block - 4 default boxes        
    ]

def make_loc_heads_layers():
    return [
        layers.Conv2D(4 * 4, 3, padding='same', name='loc_4'), # 4th block - 4 default boxes
        layers.Conv2D(6 * 4, 3, padding='same', name='loc_7'), # 7th block - 6 default boxes
        layers.Conv2D(6 * 4, 3, padding='same', name='loc_8'), # 8th block - 6 default boxes
        layers.Conv2D(6 * 4, 3, padding='same', name='loc_9'), # 9th block - 6 default boxes
        layers.Conv2D(4 * 4, 3, padding='same', name='loc_10'), # 10th block - 4 default boxes
        layers.Conv2D(4 * 4, 3, padding='same', name='loc_11'), # 11th block - 4 default boxes        
    ]

In [129]:
class SSD(keras.Model):
    
    def __init__(self, num_classes=20):
        super().__init__()
        
        self.vgg_layers = make_vgg16_layers()
        self.extra_layers = make_extra_layers()
        self.conf_layers = make_conf_heads_layers(num_classes)
        self.loc_layers = make_loc_heads_layers()
        self.out_layers = ["conv4_3", "conv7", "conv8_2", "conv9_2", "conv10_2", "conv11_2"]
        
    def get_conf_loc(self, fm, index):
        conf = self.conf_layers[index](fm)
        loc = self.loc_layers[index](fm)
        return conf, loc

    def call(self, x):
        conf_outputs = []
        loc_outputs = []

        
        head_index = 0
        for layer in self.vgg_layers:
            x = layer(x)
            print(layer.name)
            if layer.name in self.out_layers:
                conf, loc = self.get_conf_loc(x, head_index)
                conf_outputs.append(conf)
                loc_outputs.append(loc)
                head_index += 1
        
        for layer in self.extra_layers:
            x = layer(x)
            print(layer.name)
            if layer.name in self.out_layers:
                conf, loc = self.get_conf_loc(x, head_index)
                conf_outputs.append(conf)
                loc_outputs.append(loc)
                head_index += 1
            
        return conf_outputs, loc_outputs
    
    def model(self):
        inputs = layers.Input(shape=(300, 300, 3), name='input_image')
        model = keras.Model(inputs=inputs, outputs=self.call(inputs))
        return model

In [130]:
m = SSD()

In [131]:
m.model().summary()

conv1_1
conv1_2
pool1
conv2_1
conv2_2
pool2
conv3_1
conv3_2
conv3_3
pool3
conv4_1
conv4_2
conv4_3
pool4
conv5_1
conv5_2
conv5_3
pool5
conv6
conv7
conv8_1
conv8_2
conv9_1
conv9_2
conv10_1
conv10_2
conv11_1
conv11_2
Model: "model_12"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_image (InputLayer)        [(None, 300, 300, 3) 0                                            
__________________________________________________________________________________________________
conv1_1 (Conv2D)                (None, 300, 300, 64) 1792        input_image[0][0]                
__________________________________________________________________________________________________
conv1_2 (Conv2D)                (None, 300, 300, 64) 36928       conv1_1[0][0]                    
___________________________________________________________________________

# Generate default boxes

In [5]:
fm_sizes = [38, 19]