# SSD300 Training Tutorial

This tutorial explains how to train an SSD300 on the Pascal VOC datasets. The preset parameters reproduce the training of the original SSD300 "07+12" model. Training SSD512 works simiarly, so there's no extra tutorial for that. The same goes for training on other datasets.

You can find a summary of a full training here to get an impression of what it should look like:
[SSD300 "07+12" training summary](https://github.com/pierluigiferrari/ssd_keras/blob/master/training_summaries/ssd300_pascal_07%2B12_training_summary.md)

In [1]:
from keras.optimizers import Adam, SGD
from keras.callbacks import Callback, ModelCheckpoint, LearningRateScheduler, TerminateOnNaN, CSVLogger, EarlyStopping, TensorBoard
from keras import backend as K
from keras.models import load_model
from math import ceil
import numpy as np
from matplotlib import pyplot as plt
from keras.models import Model
from matplotlib import pyplot as plt
from keras.preprocessing import image
from imageio import imread

from models.keras_ssd300 import ssd_300
from keras_loss_function.keras_ssd_loss_mod import SSDLoss
from keras_loss_function.keras_ssd_loss_proj import SSDLoss_proj

from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes
from keras_layers.keras_layer_DecodeDetections import DecodeDetections
from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast
from keras_layers.keras_layer_L2Normalization import L2Normalization

from ssd_encoder_decoder.ssd_input_encoder_mod import SSDInputEncoder
from ssd_encoder_decoder.ssd_output_decoder import decode_detections, decode_detections_fast

from data_generator.object_detection_2d_data_generator import DataGenerator
from data_generator.object_detection_2d_geometric_ops import Resize_Modified
from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels_Modified
from data_generator.data_augmentation_chain_original_ssd import SSDDataAugmentation_modified
from data_generator.object_detection_2d_geometric_ops import Resize
from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels
from data_generator.data_augmentation_chain_original_ssd import SSDDataAugmentation

from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms
from bounding_box_utils.bounding_box_utils import iou, convert_coordinates
from ssd_encoder_decoder.matching_utils import match_bipartite_greedy, match_multi
import random
np.set_printoptions(threshold=np.nan)
import tensorflow as tf
np.random.seed(1337)
%matplotlib inline

Using TensorFlow backend.


## 0. Preliminary note

All places in the code where you need to make any changes are marked `TODO` and explained accordingly. All code cells that don't contain `TODO` markers just need to be executed.

## 1. Set the model configuration parameters


In [2]:
img_height = 300 # Height of the model input images
img_width = 600 # Width of the model input images
img_channels = 3 # Number of color channels of the model input images
mean_color = [123, 117, 104] # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights.
swap_channels = [2, 1, 0] # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images.
n_classes = 1 # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO
scales_pascal = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets
scales_coco = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets
scales = scales_pascal
aspect_ratios = [[1.0, 2.0, 0.5],
                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                 [1.0, 2.0, 0.5],
                 [1.0, 2.0, 0.5]] # The anchor box aspect ratios used in the original SSD300; the order matters
two_boxes_for_ar1 = True            # print(y_encoded)

steps = [8, 16, 32, 64, 100, 300] # The space between two adjacent anchor box center points for each predictor layer.
offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.
clip_boxes = False # Whether or not to clip the anchor boxes to lie entirely within the image boundaries
variances = [0.1, 0.1, 0.2, 0.2] # The variances by which the encoded target coordinates are divided as in the original implementation
normalize_coords = True

## 2. Build or load the model

You will want to execute either of the two code cells in the subsequent two sub-sections, not both.

In [3]:
# 1: Build the Keras model.

K.clear_session() # Clear previous models from memory.

model = ssd_300(image_size=(img_height, img_width, img_channels),
                n_classes=n_classes,
                mode='training',
                l2_regularization=0.0005,
                scales=scales,
                aspect_ratios_per_layer=aspect_ratios,
                two_boxes_for_ar1=two_boxes_for_ar1,
                steps=steps,
                offsets=offsets,
                clip_boxes=clip_boxes,
                variances=variances,
                normalize_coords=normalize_coords,
                subtract_mean=mean_color,
                swap_channels=swap_channels)

# 2: Load some weights into the model.

# TODO: Set the path to the weights you want to load.
weights_path = 'weights/VGG_ILSVRC_16_layers_fc_reduced.h5'

model.load_weights(weights_path, by_name=True)

# 3: Instantiate an optimizer and the SSD loss function and compile the model.
#    If you want to follow the original Caffe implementation, use the preset SGD
#    optimizer, otherwise I'd recommend the commented-out Adam optimizer.


  model = Model(input=[x,geo_1,geo_2],output=[predictions, predictions_proj])


In [4]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 300, 600, 3)   0                                            
____________________________________________________________________________________________________
input_2 (InputLayer)             (None, 300, 600, 3)   0                                            
____________________________________________________________________________________________________
identity_layer__1 (Lambda)       (None, 300, 600, 3)   0           input_1[0][0]                    
____________________________________________________________________________________________________
identity_layer__2 (Lambda)       (None, 300, 600, 3)   0           input_2[0][0]                    
___________________________________________________________________________________________

In [5]:
def gt_rem(pred, gt):
    val = tf.subtract(tf.shape(pred)[1], tf.shape(gt)[1],name="gt_rem_subtract")
    gt = tf.slice(gt, [0, 0, 0], [1, tf.shape(pred)[1], 18],name="rem_slice")
    return gt

def gt_add(pred, gt):
    #add to gt
    val = tf.subtract(tf.shape(pred)[1], tf.shape(gt)[1],name="gt_add_subtract")
    ext = tf.slice(gt, [0, 0, 0], [1, val, 18], name="add_slice")
    gt = K.concatenate([ext,gt], axis=1)
    return gt

def equalalready(gt, pred): return pred

def make_equal(pred, gt):
    equal_tensor = tf.cond(tf.shape(pred)[1] < tf.shape(gt)[1], lambda: gt_rem(pred, gt), lambda: gt_add(pred, gt), name="make_equal_cond")
    return equal_tensor



# ssd_loss3 = SSDLoss_proj(neg_pos_ratio=3, alpha=1.0)
# ssd_loss4 = SSDLoss_proj(neg_pos_ratio=3, alpha=1.0)

def Accuracy(y_true, y_pred):
    '''Calculates the mean accuracy rate across all predictions for
    multiclass classification problems.
    '''
    print("y_pred: ",y_pred)
    print("y_true: ",y_true)
    y_true = y_true[:,:,:18]
    y_pred = y_pred[:,:,:18]

    return K.mean(K.equal(K.argmax(y_true[:,:,:-4], axis=-1),
                  K.argmax(y_pred[:,:,:-4], axis=-1)))

def Accuracy_Proj(y_pred, y_true):
    #add to gt
    y_true_1 = y_true[:,:,:18]
    y_pred_1 = y_pred[:,:,:18]
    y_true_2 = y_true[:,:,18:]
    y_pred_2 = y_pred[:,:,18:]
    acc = tf.constant(0)
    y_pred, y_true = matcher(y_true_1,y_pred_1,y_true_2,y_pred_2,1)

    return K.mean(K.equal(K.argmax(y_true[:,:,:-4], axis=-1),
                  K.argmax(y_pred[:,:,:-4], axis=-1)))


adam = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

ssd_loss1 = SSDLoss(neg_pos_ratio=3, alpha=1.0)
ssd_loss2 = SSDLoss(neg_pos_ratio=3, alpha=1.0)
ssd_loss3 = SSDLoss_proj(neg_pos_ratio=3, alpha=1.0)
ssd_loss4 = SSDLoss_proj(neg_pos_ratio=3, alpha=1.0)

losses = {
    "predictions_1": ssd_loss1.compute_loss,
    "predictions_2": ssd_loss2.compute_loss,
    "predictions_1_proj": ssd_loss3.compute_loss,
    "predictions_2_proj": ssd_loss4.compute_loss

}
lossWeights = {"predictions_1": 1.0,"predictions_2": 1.0,"predictions_1_proj": 1.0,"predictions_2_proj": 1.0}
# MetricstDict = {"predictions_1": Accuracy,"predictions_2": Accuracy, "predictions_1_proj": Accuracy_Proj,"predictions_2_proj": Accuracy_Proj}
# lossWeights = {"predictions_1": 1.0,"predictions_2": 1.0}
MetricstDict = {"predictions_1": Accuracy,"predictions_2": Accuracy}

model.compile(optimizer=adam, loss=losses, loss_weights=lossWeights, metrics=MetricstDict) 
# model.compile(optimizer=adam, loss=losses, loss_weights=lossWeights) 


y_pred:  Tensor("predictions_1/concat:0", shape=(?, 17292, 18), dtype=float32)
y_true:  Tensor("predictions_1_target:0", shape=(?, ?, ?), dtype=float32)
y_pred:  Tensor("predictions_2/concat:0", shape=(?, 17292, 18), dtype=float32)
y_true:  Tensor("predictions_2_target:0", shape=(?, ?, ?), dtype=float32)


In [6]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 300, 600, 3)   0                                            
____________________________________________________________________________________________________
input_2 (InputLayer)             (None, 300, 600, 3)   0                                            
____________________________________________________________________________________________________
identity_layer__1 (Lambda)       (None, 300, 600, 3)   0           input_1[0][0]                    
____________________________________________________________________________________________________
identity_layer__2 (Lambda)       (None, 300, 600, 3)   0           input_2[0][0]                    
___________________________________________________________________________________________

### 2.2 Load a previously created model


In [7]:
# train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path='dataset_pascal_voc_07+12_trainval.h5')
# val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path='dataset_pascal_voc_07_test.h5')
train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)
val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)

val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)
val_dataset_1 = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)


VOC_2007_images_dir      = '../datasets/Images/'

# The directories that contain the annotations.
VOC_2007_annotations_dir      = '../datasets/VOC/Pasadena/Annotations_Multi/'

VOC_2007_trainval_image_set_filename = '../datasets/VOC/Pasadena/ImageSets/Main/reid_neu/train_few.txt'
VOC_2007_val_image_set_filename      = '../datasets/VOC/Pasadena/ImageSets/Main/reid_neu/val_few.txt'
VOC_2007_test_image_set_filename     = '../datasets/VOC/Pasadena/ImageSets/Main/reid_neu/test_few.txt'

# VOC_2007_trainval_image_set_filename = '../datasets/VOC/Pasadena/ImageSets/Main/reid_neu/train.txt'
# VOC_2007_val_image_set_filename      = '../datasets/VOC/Pasadena/ImageSets/Main/reid_neu/val.txt'
# VOC_2007_test_image_set_filename     = '../datasets/VOC/Pasadena/ImageSets/Main/reid_neu/test.txt'


# The pat[Accuracy]hs to the image sets.
# VOC_2007_trainval_image_set_filename = '../datasets/VOC/Pasadena/ImageSets/Main/siamese/trainval_sia.txt'
# VOC_2007_val_image_set_filename      = '../datasets/VOC/Pasadena/ImageSets/Main/siamese/val_sia.txt'
# VOC_2007_test_image_set_filename     = '../datasets/VOC/Pasadena/ImageSets/Main/siamese/test_sia.txt'

# VOC_2007_trainval_image_set_filename = '../datasets/VOC/Pasadena/ImageSets/Main/siamese/trainval_sia_same.txt'
# VOC_2007_val_image_set_filename      = '../datasets/VOC/Pasadena/ImageSets/Main/siamese/val_sia_same.txt'
# VOC_2007_test_image_set_filename     = '../datasets/VOC/Pasadena/ImageSets/Main/siamese/test_sia_same.txt'

# VOC_2007_trainval_image_set_filename = '../datasets/VOC/Pasadena/ImageSets/Main/siamese/trainval_sia_sub.txt'
# VOC_2007_val_image_set_filename      = '../datasets/VOC/Pasadena/ImageSets/Main/siamese/val_sia_sub.txt'
# VOC_2007_test_image_set_filename     = '../datasets/VOC/Pasadena/ImageSets/Main/siamese/test_sia_sub.txt'

# VOC_2007_trainval_image_set_filename = '../datasets/VOC/Pasadena/ImageSets/Main/siamese/trainval_one.txt'
# VOC_2007_val_image_set_filename      = '../datasets/VOC/Pasadena/ImageSets/Main/siamese/val_one.txt'
# VOC_2007_test_image_set_filename     = '../datasets/VOC/Pasadena/ImageSets/Main/siamese/test_one.txt'

# The XML parser needs to now what object class names to look for and in which order to map them to integers.
classes = ['background',
           'tree']

train_dataset.parse_xml(images_dirs=[VOC_2007_images_dir],
                        image_set_filenames=[VOC_2007_trainval_image_set_filename],
                        annotations_dirs=[VOC_2007_annotations_dir],
                        classes=classes,
                        include_classes='all',
                        exclude_truncated=False,
                        exclude_difficult=False,
                        ret=False)


val_dataset.parse_xml(images_dirs=[VOC_2007_images_dir],
                      image_set_filenames=[VOC_2007_val_image_set_filename],
                      annotations_dirs=[VOC_2007_annotations_dir],
                      classes=classes,
                      include_classes='all',
                      exclude_truncated=False,
                      exclude_difficult=True,
                      ret=False)



Processing image set 'train_few.txt': 100%|██████████| 4/4 [00:00<00:00, 23.12it/s]
Processing image set 'val_few.txt': 100%|██████████| 2/2 [00:00<00:00, 26.16it/s]


In [8]:
batch_size = 4

ssd_data_augmentation = SSDDataAugmentation_modified(img_height=img_height,
                                            img_width=img_width,
                                            background=mean_color)
# For the validation generator:
convert_to_3_channels = ConvertTo3Channels_Modified()  
resize = Resize_Modified(height=img_height, width=img_width)

# 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

# The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
predictor_sizes = [model.get_layer('conv4_3_norm_mbox_conf__1').output_shape[1:3],
                   model.get_layer('fc7_mbox_conf__1').output_shape[1:3],
                   model.get_layer('conv6_2_mbox_conf__1').output_shape[1:3],
                   model.get_layer('conv7_2_mbox_conf__1').output_shape[1:3],
                   model.get_layer('conv8_2_mbox_conf__1').output_shape[1:3],
                   model.get_layer('conv9_2_mbox_conf__1').output_shape[1:3]]

ssd_input_encoder = SSDInputEncoder(img_height=img_height,
                                    img_width=img_width,
                                    n_classes=n_classes,
                                    predictor_sizes=predictor_sizes,
                                    scales=scales,
                                    aspect_ratios_per_layer=aspect_ratios,
                                    two_boxes_for_ar1=two_boxes_for_ar1,
                                    steps=steps,
                                    offsets=offsets,
                                    clip_boxes=clip_boxes,
                                    variances=variances,
                                    matching_type='multi',
                                    pos_iou_threshold=0.5,
                                    neg_iou_limit=0.5,
                                    normalize_coords=normalize_coords)

# 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.

train_generator = train_dataset.generate(batch_size=batch_size,
                                         shuffle=False,
                                         transformations=[ssd_data_augmentation],
                                         label_encoder=ssd_input_encoder,
                                         returns={'processed_images',
                                                  'encoded_labels'},
                                         keep_images_without_gt=False)

val_generator = val_dataset.generate(batch_size=batch_size,
                                     shuffle=False,
                                     transformations=[convert_to_3_channels,
                                                      resize],
                                     label_encoder=ssd_input_encoder,
                                     returns={'processed_images',
                                              'encoded_labels'},
                                     keep_images_without_gt=False)

# Get the number of samples in the training and validations datasets.
train_dataset_size = train_dataset.get_dataset_size()

val_dataset_size   = val_dataset.get_dataset_size()

print("Number of images in the training dataset:\t{:>6}".format(train_dataset_size))
print("Number of images in the validation dataset:\t{:>6}".format(val_dataset_size)) 

Number of images in the training dataset:	     4
Number of images in the validation dataset:	     2


## 4. Set the remaining training parameters

We've already chosen an optimizer and set the batch size above, now let's set the remaining training parameters. I'll set one epoch to consist of 1,000 training steps. The next code cell defines a learning rate schedule that replicates the learning rate schedule of the original Caffe implementation for the training of the SSD300 Pascal VOC "07+12" model. That model was trained for 120,000 steps with a learning rate of 0.001 for the first 80,000 steps, 0.0001 for the next 20,000 steps, and 0.00001 for the last 20,000 steps. If you're training on a different dataset, define the learning rate schedule however you see fit.

I'll set only a few essential Keras callbacks below, feel free to add more callbacks if you want TensorBoard summaries or whatever. We obviously need the learning rate scheduler and we want to save the best models during the training. It also makes sense to continuously stream our training history to a CSV log file after every epoch, because if we didn't do that, in case the training terminates with an exception at some point or if the kernel of this Jupyter notebook dies for some reason or anything like that happens, we would lose the entire history for the trained epochs. Finally, we'll also add a callback that makes sure that the training terminates if the loss becomes `NaN`. Depending on the optimizer you use, it can happen that the loss becomes `NaN` during the first iterations of the training. In later iterations it's less of a risk. For example, I've never seen a `NaN` loss when I trained SSD using an Adam optimizer, but I've seen a `NaN` loss a couple of times during the very first couple of hundred training steps of training a new model when I used an SGD optimizer.

In [9]:
# Define a learning rate schedule.

def lr_schedule(epoch):
    if epoch < 80:
        return 0.001
    elif epoch < 100:
        return 0.0001
    else:
        return 0.00001

In [10]:

neg_pos_ratio = 3
n_neg_min = 0
alpha = 1

def smooth_L1_loss(y_true, y_pred):
    absolute_loss = tf.abs(y_true - y_pred)
    square_loss = 0.5 * (y_true - y_pred)**2
    l1_loss = tf.where(tf.less(absolute_loss, 1.0), square_loss, absolute_loss - 0.5)
    return tf.reduce_sum(l1_loss, axis=-1)

def log_loss(y_true, y_pred):

    y_pred = tf.maximum(y_pred, 1e-15)
    # Compute the log loss
    log_loss = -tf.reduce_sum(y_true * tf.log(y_pred), axis=-1)
    return log_loss


def compute_loss(y_true, y_pred):
    def gt_rem(pred, gt):
        val = tf.subtract(tf.shape(pred)[1], tf.shape(gt)[1],name="gt_rem_subtract")
        gt = tf.slice(gt, [0, 0, 0], [1, tf.shape(pred)[1], 18],name="rem_slice")
        return gt

    def gt_add(pred, gt):
        #add to gt
        val = tf.subtract(tf.shape(pred)[1], tf.shape(gt)[1],name="gt_add_subtract")
        ext = tf.slice(gt, [0, 0, 0], [1, val, 18], name="add_slice")
        gt = K.concatenate([ext,gt], axis=1)
        return gt

    def equalalready(gt, pred): return pred

    def make_equal(pred, gt):
        equal_tensor = tf.cond(tf.shape(pred)[1] < tf.shape(gt)[1], lambda: gt_rem(pred, gt), lambda: gt_add(pred, gt), name="make_equal_cond")
        return equal_tensor


    def matcher(y_true_1,y_true_2,y_pred_1,y_pred_2, bsz):
        pred = 0
        gt = 0
        print("@@@: ",y_true_2[:,20:50,:])

        for i in range(bsz):
            
            filterer = tf.where(tf.not_equal(y_true_1[i,:,-4],99))
            filterer_2 = tf.where(tf.not_equal(y_true_2[i,:,-4],99))
            print("$$$: ",y_true_2[:,20:50,:])

            y_true_new = tf.gather_nd(y_true_1[i,:,:],filterer)            
            y_true_new = tf.expand_dims(y_true_new, 0)
            
            y_true_2_new = tf.gather_nd(y_true_2[i,:,:],filterer_2)
            y_true_2_new = tf.expand_dims(y_true_2_new, 0)

            set1 = y_true_new[i,:,-4]
            set2 = y_true_2_new[i,:,-4]
#             print(K.eval(y_true_2_new[i,20:50,:]))
            print(K.eval(set1))
            print(K.eval(set2))

#             s = tf.sets.set_intersection(set1[None,:], set2[None, :])
            
            iou_out = tf.py_func(iou, [y_pred_1[i,:,-16:-12],tf.convert_to_tensor(y_true_new[i,:,-16:-12])], tf.float64, name="iou_out")
            bipartite_matches = tf.py_func(match_bipartite_greedy, [iou_out], tf.int64, name="bipartite_matches")
            out = tf.gather(y_pred_2[i,:,:], [bipartite_matches], axis=0, name="out")
            


            box_comparer = tf.reduce_all(tf.equal(tf.shape(out)[1], tf.shape(y_true_2_new)[1]), name="box_comparer")
            y_true_2_equal = tf.cond(box_comparer, lambda: equalalready(out, y_true_2_new), lambda: make_equal(out, y_true_2_new), name="y_true_cond")

            if i != 0:
                pred = K.concatenate([pred,out], axis=-1)
                gt = K.concatenate([gt,y_true_2_equal], axis=0)
            else:
                pred = out
                gt = y_true_2_equal    
        return pred, gt
    
    y_true_1 = y_true[:,:,:18]
    y_pred_1 = y_pred[:,:,:18]
    y_true_2 = y_true[:,:,18:]
    y_pred_2 = y_pred[:,:,18:]
    print("###: ",y_true_2[:,20:50,:])

    y_pred, y_true = matcher(y_true_1,y_pred_1,y_true_2,y_pred_2,1)
    y_pred1 = y_pred_1
    t_true1 = y_true_1

    batch_size = tf.shape(y_pred1)[0]
    n_boxes = tf.shape(t_true1)[1] 

    classification_loss = tf.to_float(log_loss(t_true1[:,:,:-16], y_pred1[:,:,:-16])) # Output shape: (batch_size, n_boxes)
    localization_loss = tf.to_float(smooth_L1_loss(t_true1[:,:,-16:-12], y_pred1[:,:,-16:-12])) # Output shape: (batch_size, n_boxes)

    negatives = t_true1[:,:,0] # Tensor of shape (batch_size, n_boxes)
    positives = tf.to_float(tf.reduce_max(t_true1[:,:,1:-16], axis=-1)) # Tensor of shape (batch_size, n_boxes)
    n_positive = tf.reduce_sum(positives)

    pos_class_loss = tf.reduce_sum(classification_loss * positives, axis=-1) # Tensor of shape (batch_size,)


    neg_class_loss_all = classification_loss * negatives # Tensor of shape (batch_size, n_boxes)
    n_neg_losses = tf.count_nonzero(neg_class_loss_all, dtype=tf.int32) # The number of non-zero loss entries in `neg_class_loss_all`
    n_negative_keep = tf.minimum(tf.maximum(neg_pos_ratio * tf.to_int32(n_positive), n_neg_min), n_neg_losses)

    def f1():
        return tf.zeros([batch_size])
    def f2():

        neg_class_loss_all_1D = tf.reshape(neg_class_loss_all, [-1]) # Tensor of shape (batch_size * n_boxes,)
        values, indices = tf.nn.top_k(neg_class_loss_all_1D,
                                      k=n_negative_keep,
                                      sorted=False) # We don't need them sorted.

        negatives_keep = tf.scatter_nd(indices=tf.expand_dims(indices, axis=1),
                                       updates=tf.ones_like(indices, dtype=tf.int32),
                                       shape=tf.shape(neg_class_loss_all_1D)) # Tensor of shape (batch_size * n_boxes,)
        negatives_keep = tf.to_float(tf.reshape(negatives_keep, [batch_size, n_boxes])) # Tensor of shape (batch_size, n_boxes)
        # ...and use it to keep only those boxes and mask all other classification losses
        neg_class_loss = tf.reduce_sum(classification_loss * negatives_keep, axis=-1) # Tensor of shape (batch_size,)
        return neg_class_loss

    neg_class_loss = tf.cond(tf.equal(n_neg_losses, tf.constant(0)), f1, f2)

    class_loss = pos_class_loss + neg_class_loss # Tensor of shape (batch_size,)

    loc_loss = tf.reduce_sum(localization_loss * positives, axis=-1) # Tensor of shape (batch_size,)

    # 4: Compute the total loss.

    total_loss = (class_loss + alpha * loc_loss) / tf.maximum(1.0, n_positive) # In case `n_positive == 0`
    total_loss = total_loss * tf.to_float(batch_size)
    total_loss.set_shape((None,))
    return total_loss, y_pred, y_true


In [11]:
class prediction_history(Callback):
    def __init__(self):
        print("Predictor")
    def on_epoch_end(self, epoch, logs={}):
        predder = np.load('outputs/predder.npy')
        bX = predder[0][0]
        bZ = predder[0][1]
        gX = predder[0][2]
        gZ = predder[0][3]
        
        y_true = predder[1]['predictions_1_proj']

        intermediate_layer_model = Model(inputs=model.input,
                             outputs=model.get_layer("predictions_1").output)
        intermediate_layer_model_1 = Model(inputs=model.input,
                             outputs=model.get_layer("predictions_1_proj").output)
        intermediate_layer_model_2 = Model(inputs=model.input,
                             outputs=model.get_layer("predictions_2").output)
        intermediate_layer_model_3 = Model(inputs=model.input,
                             outputs=model.get_layer("predictions_2_proj").output)

        intermediate_output = intermediate_layer_model.predict([bX,bZ,gX,gZ])
        intermediate_output_1 = intermediate_layer_model_1.predict([bX,bZ,gX,gZ])
        intermediate_output_2 = intermediate_layer_model_2.predict([bX,bZ,gX,gZ])
        intermediate_output_3 = intermediate_layer_model_3.predict([bX,bZ,gX,gZ])
        loss,y_pred1, y_true1 = compute_loss(y_true,intermediate_output_1)
        
        np.save('outputs/y_pred'+str(epoch)+'.npy',K.eval(y_pred1))
        np.save('outputs/y_true'+str(epoch)+'.npy',K.eval(y_true1))


In [12]:
# Define model callbacks.

# TODO: Set the filepath under which you want to save the model.
model_checkpoint = ModelCheckpoint(filepath='checkpoints/double_ssd300_pascal_07+12_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5',
                                   monitor='val_loss',
                                   verbose=1,
                                   save_best_only=True,
                                   save_weights_only=False,
                                   mode='auto',
                                   period=1)
#model_checkpoint.best = 
tbCallBack = TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True)

csv_logger = CSVLogger(filename='ssd300_pascal_07+12_training_log.csv',
                       separator=',',
                       append=True)

learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule)

early_stopping = EarlyStopping(monitor='val_loss',
                              min_delta=0,
                              patience=1,
                              verbose=0, mode='auto')

terminate_on_nan = TerminateOnNaN()
printer_callback = prediction_history()
# custom_los = custom_loss()
callbacks = [
#             model_checkpoint,
#             csv_logger,
#             custom_los,
            learning_rate_scheduler,
            early_stopping,
            terminate_on_nan,
            printer_callback,
            tbCallBack
            ]

Predictor


## 5. Train

In [13]:
# If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly.
initial_epoch   = 0
final_epoch     = 500
steps_per_epoch = 1000

# history = model.fit_generator(generator=train_generator,
#                               steps_per_epoch=ceil(train_dataset_size/batch_size),
#                               epochs=final_epoch,
#                               callbacks=callbacks,
#                               verbose=1,
#                               validation_data=val_generator,
#                               validation_steps=ceil(val_dataset_size/batch_size),
#                               initial_epoch=initial_epoch)

history = model.fit_generator(generator=train_generator,
                              steps_per_epoch=ceil(train_dataset_size/batch_size),
                              epochs=final_epoch,
                              callbacks=callbacks,
                              verbose=1,
                              validation_data=val_generator,
                              validation_steps=ceil(val_dataset_size/batch_size),
                              initial_epoch=initial_epoch)

Epoch 1/500
###:  [[[1.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
   0.00000000e+00 0.00000000e+00 7.33333333e-02 1.33333333e-02
   5.00000000e-02 1.00000000e-01 1.00000000e-01 1.00000000e-01
   2.00000000e-01 2.00000000e-01 9.90000000e+01 9.90000000e+01
   1.99000000e+02 1.99000000e+02]
  [1.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
   0.00000000e+00 0.00000000e+00 7.33333333e-02 1.33333333e-02
   7.07106781e-02 1.41421356e-01 1.00000000e-01 1.00000000e-01
   2.00000000e-01 2.00000000e-01 9.90000000e+01 9.90000000e+01
   1.99000000e+02 1.99000000e+02]
  [1.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
   0.00000000e+00 0.00000000e+00 7.33333333e-02 1.33333333e-02
   7.07106781e-02 7.07106781e-02 1.00000000e-01 1.00000000e-01
   2.00000000e-01 2.00000000e-01 9.90000000e+01 9.90000000e+01
   1.99000000e+02 1.99000000e+02]
  [1.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
   0.00000000e+00 0.00000000e+00 7.33333333e-02 1.33333333e-0

[100055. 100055. 100055. 100055. 100055. 100055. 100055. 100055.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 

Epoch 2/500
###:  [[[1.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
   0.00000000e+00 0.00000000e+00 7.33333333e-02 1.33333333e-02
   5.00000000e-02 1.00000000e-01 1.00000000e-01 1.00000000e-01
   2.00000000e-01 2.00000000e-01 9.90000000e+01 9.90000000e+01
   1.99000000e+02 1.99000000e+02]
  [1.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
   0.00000000e+00 0.00000000e+00 7.33333333e-02 1.33333333e-02
   7.07106781e-02 1.41421356e-01 1.00000000e-01 1.00000000e-01
   2.00000000e-01 2.00000000e-01 9.90000000e+01 9.90000000e+01
   1.99000000e+02 1.99000000e+02]
  [1.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
   0.00000000e+00 0.00000000e+00 7.33333333e-02 1.33333333e-02
   7.07106781e-02 7.07106781e-02 1.00000000e-01 1.00000000e-01
   2.00000000e-01 2.00000000e-01 9.90000000e+01 9.90000000e+01
   1.99000000e+02 1.99000000e+02]
  [1.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
   0.00000000e+00 0.00000000e+00 7.33333333e-02 1.33333333e-0

[100055. 100055. 100055. 100055. 100055. 100055. 100055. 100055.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 

KeyboardInterrupt: 