## Single Shot MultiBox Detector(SSD)

This notebook has the goal to be a keras 2 port of the Google's model SSD, for object detection. We first load the model itself and all it's dependencies.

In [None]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

from keras.applications.imagenet_utils import preprocess_input
from keras.backend.tensorflow_backend import set_session
from keras.models import Model
from keras.preprocessing import image
from scipy.misc import imread

# Jupyter notebook props to plot images properly
%matplotlib inline
plt.rcParams['figure.figsize'] = (8, 8)
plt.rcParams['image.interpolation'] = 'nearest'

np.set_printoptions(suppress=True)

# Defining tensorflow GPU memory usage ratio
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.5
set_session(tf.Session(config=config))

In [None]:
from ssd import SSD300
from ssd_utils import BBoxUtility

voc_classes = ['Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle',
               'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable',
               'Dog', 'Horse','Motorbike', 'Person', 'Pottedplant',
               'Sheep', 'Sofa', 'Train', 'Tvmonitor']
NUM_CLASSES = len(voc_classes) + 1
INPUT_SHAPE = (300, 300, 3)

### Loading weights

We have pre-trained weights for the PASCAL Visual Object Classes(VOC) dataset, which can be found [here](https://mega.nz/#F!7RowVLCL!q3cEVRK9jyOSB9el3SssIA). They must be placed on the `weights` folder.

In [None]:
bbox_util = BBoxUtility(NUM_CLASSES)

try:
    model = SSD300(INPUT_SHAPE, num_classes=NUM_CLASSES)
    model.load_weights('weights/weights_SSD300.hdf5', by_name=True)
except OSError:
    print('[ERROR] Weights not found! They must be ' +
          'placed inside the weights folder.')

### Loading testing images

As our model is already pre-trained, we don't need to load training sets. This kernel loads a tiny set of images to be used to testing/debug.

In [None]:
def load_inputs(img_path):
    loaded_image = image.load_img(img_path, target_size=(300, 300))
    return image.img_to_array(loaded_image)

to_be_loaded = ['pics/fish-bike.jpg', 'pics/cat.jpg', 'pics/boys.jpg',
                'pics/car_cat.jpg', 'pics/car_cat2.jpg']

inputs = list(map(load_inputs, to_be_loaded))
images = list(map(lambda i: imread(i), to_be_loaded))

inputs = preprocess_input(np.array(inputs))

In [None]:
%%time
preds = model.predict(inputs, batch_size=1, verbose=1)

### Presenting some samples

After the training, we must show the detection bounding boxes of the model

In [None]:
results = bbox_util.detection_out(preds)

for i, img in enumerate(images):
    # Parse the outputs.
    det_label = results[i][:, 0]
    det_conf = results[i][:, 1]
    det_xmin = results[i][:, 2]
    det_ymin = results[i][:, 3]
    det_xmax = results[i][:, 4]
    det_ymax = results[i][:, 5]

    # Get detections with confidence higher than 0.6.
    top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.6]

    top_conf = det_conf[top_indices]
    top_label_indices = det_label[top_indices].tolist()
    top_xmin = det_xmin[top_indices]
    top_ymin = det_ymin[top_indices]
    top_xmax = det_xmax[top_indices]
    top_ymax = det_ymax[top_indices]

    colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist()

    plt.imshow(img / 255.)
    currentAxis = plt.gca()

    for i in range(top_conf.shape[0]):
        xmin = int(round(top_xmin[i] * img.shape[1]))
        ymin = int(round(top_ymin[i] * img.shape[0]))
        xmax = int(round(top_xmax[i] * img.shape[1]))
        ymax = int(round(top_ymax[i] * img.shape[0]))
        score = top_conf[i]
        label = int(top_label_indices[i])
        label_name = voc_classes[label - 1]
        display_txt = '{:0.2f}, {}'.format(score, label_name)
        coords = (xmin, ymin), xmax-xmin+1, ymax-ymin+1
        color = colors[label]
        currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2))
        currentAxis.text(xmin, ymin, display_txt, bbox={'facecolor':color, 'alpha':0.5})
    
    plt.show()