In [None]:
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, CSVLogger
from tensorflow.keras import backend as K
from math import ceil

from model import SSD
from loss import Loss

from coder import InputEncoder

from data import DataGenerator, Resize, ConvertTo3Channels, DataAugmentation

%matplotlib inline

In [None]:
image_height = 500
image_width = 500
image_channels = 3
mean_color = [133, 100, 101]
swap_channels = [2, 1, 0]
n_classes = 2
scales = [0.11, 0.22, 0.55, 0.66, 0.77, 0.88, 1.01]
aspect_ratios = [[1.0, 2.0, 0.5],
                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                 [1.0, 2.0, 0.5],
                 [1.0, 2.0, 0.5]]
two_boxes_for_ar1 = True
steps = [1, 2, 4, 6, 16, 32]
offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
variances = [0.1, 0.1, 0.2, 0.2]

In [None]:
K.clear_session()

model = SSD(image_size=(image_height, image_width, image_channels),
                n_classes=n_classes,
                mode='training',
                l2_regularization=0.0005,
                scales=scales,
                aspect_ratios_per_layer=aspect_ratios,
                two_boxes_for_ar1=two_boxes_for_ar1,
                steps=steps,
                offsets=offsets,
                clip_boxes=False,
                variances=variances,
                subtract_mean=mean_color,
                swap_channels=swap_channels)


VGG16 = 'VGG16.h5'

model.load_weights(VGG16, by_name=True)

sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=True)

_loss = Loss(neg_pos_ratio=3, alpha=1.0)

model.compile(optimizer=sgd, loss=_loss.compute_loss)

In [None]:
train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path="train.h5")
val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path="val.h5")

train_dataset.load_hdf5_dataset()
val_dataset.load_hdf5_dataset()

train_dataset_size = train_dataset.get_dataset_size()
val_dataset_size   = val_dataset.get_dataset_size()

In [None]:
batch_size = 32

_data_augmentation = DataAugmentation(image_height=image_height,
                                            image_width=image_width,
                                            background=mean_color)

convert_to_3_channels = ConvertTo3Channels()
resize = Resize(height=image_height, width=image_width)

predictor_sizes = [model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
                   model.get_layer('fc7_mbox_conf').output_shape[1:3],
                   model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
                   model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
                   model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
                   model.get_layer('conv9_2_mbox_conf').output_shape[1:3]]

_input_encoder = InputEncoder(image_height=image_height,
                                    image_width=image_width,
                                    n_classes=n_classes,
                                    predictor_sizes=predictor_sizes,
                                    scales=scales,
                                    aspect_ratios_per_layer=aspect_ratios,
                                    two_boxes_for_ar1=two_boxes_for_ar1,
                                    steps=steps,
                                    offsets=offsets,
                                    clip_boxes=False,
                                    variances=variances,
                                    matching_type='multi',
                                    pos_iou_threshold=0.5,
                                    neg_iou_limit=0.5)

train_generator = train_dataset.generate(batch_size=batch_size,
                                         shuffle=True,
                                         transformations=[_data_augmentation],
                                         label_encoder=_input_encoder,
                                         returns={'processed_images',
                                                  'encoded_labels'},
                                         keep_images_without_gt=False)

val_generator = val_dataset.generate(batch_size=batch_size,
                                     shuffle=False,
                                     transformations=[convert_to_3_channels,
                                                      resize],
                                     label_encoder=_input_encoder,
                                     returns={'processed_images',
                                              'encoded_labels'},
                                     keep_images_without_gt=False)

In [None]:
def schedule(epoch):
    if epoch < 10:
        return 0.001
    elif epoch < 50:
        return 0.0001
    else:
        return 0.00001

model_checkpoint = ModelCheckpoint(filepath='-ep{epoch:02d}_loss-{loss:.5f}_val_loss-{val_loss:.5f}.h5',
                                   monitor='loss',
                                   verbose=1,
                                   save_best_only=True,
                                   save_weights_only=False,
                                   mode='auto',
                                   save_freq='epoch')

csv_logger = CSVLogger(filename='log.csv',
                       separator=',',
                       append=True)

learning_rate_scheduler = LearningRateScheduler(schedule=schedule, verbose=1)

callbacks = [learning_rate_scheduler, model_checkpoint, csv_logger]

In [None]:
history = model.fit_generator(generator=train_generator,
                              steps_per_epoch=120,
                              epochs=500,
                              callbacks=callbacks,
                              validation_data=val_generator,
                              validation_steps=ceil(val_dataset_size/batch_size),
                              initial_epoch=0)