# Simple implementation of SSD Single Shot MultiBox Detector

Paper SSD: Single Shot MultiBox Detector [https://arxiv.org/pdf/1512.02325.pdf]
Inspiration from https://github.com/rykov8/ssd_keras



# Datasets

**Current test set**
French Traffic lights (jpeg zipped, 468MB  | ground truth txt, 0.5MB)
http://www.lara.prd.fr/benchmarks/trafficlightsrecognition
    

**To try:**

German traffic lights 5093 images, 10756 annotated traffic lights
https://hci.iwr.uni-heidelberg.de/node/6132

VIVA traffic light detection benchmark (California) Day Train Set (12.4 GB,Day Test Set 3.8 GB)
http://cvrr.ucsd.edu/vivachallenge/index.php/traffic-light/traffic-light-detection/


In [None]:
# Setup paths to load dataset
image_folder = 'Lara_UrbanSeq1_JPG/Lara3D_UrbanSeq1_JPG'
ground_truth_file = 'Lara_UrbanSeq1_GroundTruth_GT.txt'

# Model
The original paper uses VGG for implementing the detector.


TODO : Will try experimenting with Xception and MobileNet  
https://keras.io/applications/#usage-examples-for-image-classification-models


In [1]:
# Extract features from an arbitrary intermediate layer with VGG19

from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
from keras.models import Model
import numpy as np

base_model = VGG16(weights='imagenet', input_shape=(224, 224, 3),pooling=None, include_top=False)


print('Pre-trained model loaded.')


#from keras.utils import plot_model
#plot_model(base_model, to_file='model.png')
base_model.summary()

for layer in base_model.layers:
    layer.trainable = False


Using TensorFlow backend.


Pre-trained model loaded.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)  

In [15]:

# Extract pooling layers out of VGG-16


num_classes = 4
num_coordinates = 4 
#Add a classifier and 
from keras.layers import Dense, Conv2D, Conv2DTranspose, Add, Flatten

layer_5_1x1 = Conv2D(num_classes+num_coordinates, 1, padding='same', data_format=None,
               activation='elu')(base_model.get_layer('block5_pool').output)

expand_layer_5 = Conv2DTranspose(num_classes+num_coordinates, 3, strides=(2, 2), padding='same', activation='elu')(layer_5_1x1)

layer_4_1x1 = Conv2D(num_classes+num_coordinates, 1, padding='same', data_format=None,
               activation='elu')(base_model.get_layer('block4_pool').output)

merge_5_and_4 = Add()([expand_layer_5, layer_4_1x1])

expand_layer_4_and_5 = Conv2DTranspose(num_classes+num_coordinates, 3, strides=(2, 2), padding='same', activation='elu')(merge_5_and_4)

layer_3_1x1 = Conv2D(num_classes+num_coordinates, 1, padding='same', data_format=None,
               activation='elu')(base_model.get_layer('block3_pool').output)

merge_out = Add()([expand_layer_4_and_5, layer_3_1x1])

flatten_out = Flatten()(merge_out)

dense_class_out = Dense(num_classes, activation='softmax', name='class_out')(flatten_out)
dense_position_out = Dense(num_coordinates, activation='relu', name='position_out')(flatten_out)


In [16]:
# Build a classifier model to put on top of the convolutional model

model = Model(inputs=base_model.input, outputs=[dense_class_out,dense_position_out])

In [18]:
model.summary()

for i in model.layers:
    print(i.trainable)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 224, 224, 64) 1792        input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv2 (Conv2D)           (None, 224, 224, 64) 36928       block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_pool (MaxPooling2D)      (None, 112, 112, 64) 0           block1_conv2[0][0]               
__________________________________________________________________________________________________
block2_con

In [None]:
# TODO: Batch generator

In [None]:
model.compile(optimizer='rmsprop',
              loss={'class_out': 'categorical_crossentropy', 'position_out': 'mean_squared_error'},
              loss_weights={'main_output': 1., 'aux_output': 0.5})

# And trained it via:
model.fit(images, {'class_out': labels, 'position_out': positions},
          epochs=50, batch_size=32)