In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.getcwd()

'/data/aqvs/notebooks'

In [3]:
import sys, glob, shutil
os.chdir(os.path.dirname(os.getcwd()))
os.getcwd()

'/data/aqvs'

#### Adding "src/networks" folder in path, to enable in-line imports for the network files using importlib

In [4]:
import os, sys
os.environ['TF_CPP_MIN_LOG_LEVEL']='3'
sys.path.append(os.path.abspath('./src/networks'))

In [5]:
#os.environ['CUDA_VISIBLE_DEVICES'] = 0

#### To handel OOM errors

In [6]:
import tensorflow as tf
from keras import backend as K
import keras.backend.tensorflow_backend as ktf
def get_session():
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction= 0.8,
                                allow_growth=True)
    return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
ktf.set_session(get_session())

Using TensorFlow backend.


In [7]:
from config import retinanet as config

## Step 1:

In [8]:
from src import networks

In [9]:
# create object that stores backbone information
backbone = networks.backbone(config["backbone"])

## Step 2:

In [10]:
from src.training.csv_generator import CSVGenerator
from src.utils.transform import random_transform_generator

In [11]:
def create_generators(config, preprocess_image):
    """ Create generators for training and validation.

    Args
        config             : config dict containing configuration for generators.
        preprocess_image : Function that preprocesses an image for the network.
    """
    common_args = {
        'batch_size'       : config["batch-size"],
        'image_min_side'   : config["image-min-side"],
        'image_max_side'   : config["image-max-side"],
        'preprocess_image' : preprocess_image,
    }

    # create random transform generator for augmenting training data
    if config["random-transform"]:
        transform_generator = random_transform_generator(
            min_rotation=-0.1,
            max_rotation=0.1,
            min_translation=(-0.1, -0.1),
            max_translation=(0.1, 0.1),
            min_shear=-0.1,
            max_shear=0.1,
            min_scaling=(0.9, 0.9),
            max_scaling=(1.1, 1.1),
            flip_x_chance=0.5,
            flip_y_chance=0.5,
        )
    else:
        transform_generator = random_transform_generator(flip_x_chance=0.5)


    train_generator = CSVGenerator(config["annotations"],
                                   config["classes"],
                                   transform_generator=transform_generator,
                                   **common_args)

    if config["val-annotations"]:
        validation_generator = CSVGenerator(config["val-annotations"],
                                            config["classes"],
                                            **common_args)
    else:
        validation_generator = None

    return train_generator, validation_generator

In [12]:
# create the generators
train_generator, validation_generator = create_generators(config, backbone.preprocess_image)

## Step 3:

In [13]:
import keras

In [14]:
from src.training import losses
from src.networks.retinanet import retinanet_bbox
from src.utils.model import freeze as freeze_model

In [15]:
def model_with_weights(model, weights, skip_mismatch):
    """ Load weights for model.

    Args
        model         : The model to load weights for.
        weights       : The weights to load.
        skip_mismatch : If True, skips layers whose shape of weights doesn't match with the model.
    """
    if weights is not None:
        model.load_weights(weights, by_name=True, skip_mismatch=skip_mismatch)
    return model

In [16]:
def create_models(backbone_retinanet, num_classes, weights, multi_gpu=0,
                  freeze_backbone=False, lr=1e-5, config=None):
    """ Creates three models (model, training_model, prediction_model).

    Args
        backbone_retinanet : A function to call to create a retinanet model with a given backbone.
        num_classes        : The number of classes to train.
        weights            : The weights to load into the model.
        multi_gpu          : The number of GPUs to use for training.
        freeze_backbone    : If True, disables learning for the backbone.
        config             : Config parameters, None indicates the default configuration.

    Returns
        model            : The base model. This is also the model that is saved in snapshots.
        training_model   : The training model. If multi_gpu=0, this is identical to model.
        prediction_model : The model wrapped with utility functions to perform object detection (applies regression values and performs NMS).
    """

    modifier = freeze_model if freeze_backbone else None

    # load anchor parameters, or pass None (so that defaults will be used)
    anchor_params = None
    num_anchors   = None
    if config and 'anchor_parameters' in config:
        anchor_params = parse_anchor_parameters(config)
        num_anchors   = anchor_params.num_anchors()

    # Keras recommends initialising a multi-gpu model on the CPU to ease weight sharing, and to prevent OOM errors.
    # optionally wrap in a parallel model
    if multi_gpu > 1:
        from keras.utils import multi_gpu_model
        with tf.device('/cpu:0'):
            model = model_with_weights(backbone_retinanet(num_classes, num_anchors=num_anchors, modifier=modifier), weights=weights, skip_mismatch=True)
        training_model = multi_gpu_model(model, gpus=multi_gpu)
    else:
        model          = model_with_weights(backbone_retinanet(num_classes, num_anchors=num_anchors, modifier=modifier), weights=weights, skip_mismatch=True)
        training_model = model

    # make prediction model
    prediction_model = retinanet_bbox(model=model, anchor_params=anchor_params)

    # compile model
    training_model.compile(
        loss={
            'regression'    : losses.smooth_l1(),
            'classification': losses.focal()
        },
        optimizer=keras.optimizers.adam(lr=lr, clipnorm=0.001)
    )

    return model, training_model, prediction_model

In [17]:
# create the model
if config["resume-training"]:
    print('Loading model, this may take a second...')
    model = models.load_model(config["snapshot"], backbone_name=config["backbone"])
    training_model   = model
    anchor_params    = None
    prediction_model = retinanet_bbox(model=model, anchor_params=anchor_params)
    
else:
    weights = config["weights"]
    
    # default to imagenet if nothing else is specified
    if weights is None and config["imagenet_weights"]:
        weights = backbone.download_imagenet()

    print('Creating model, this may take a second...')
    model, training_model, prediction_model = create_models(backbone_retinanet=backbone.retinanet,
                                                            num_classes=train_generator.num_classes(),
                                                            weights=weights,
                                                            multi_gpu=config["multi-gpu"],
                                                            freeze_backbone=config["freeze-backbone"],
                                                            lr=config["lr"])

# print model summary
print(model.summary())

Creating model, this may take a second...
Instructions for updating:
Colocations handled automatically by placer.


  weight_values[i].shape))
  weight_values[i].shape))


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None, None, 3 0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, None, None, 6 9408        input_1[0][0]                    
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, None, None, 6 256         conv1[0][0]                      
__________________________________________________________________________________________________
conv1_relu (Activation)         (None, None, None, 6 0           bn_conv1[0][0]                   
__________________________________________________________________________________________________
pool1 (Max

## Step 4:

In [18]:
from src.utils.anchors import make_shapes_callback

In [19]:
# this lets the generator compute backbone layer shapes using the actual backbone model
if 'vgg' in config["backbone"] or 'densenet' in config["backbone"]:
    train_generator.compute_shapes = make_shapes_callback(model)
    if validation_generator:
        validation_generator.compute_shapes = train_generator.compute_shapes

## Step 5:

In [20]:
from src.training.callbacks import RedirectModel
from src.training.callbacks.eval import Evaluate

In [21]:
def makedirs(path):
    # Intended behavior: try to create the directory,
    # pass if the directory exists already, fails otherwise.
    # Meant for Python 2.7/3.n compatibility.
    try:
        os.makedirs(path)
    except OSError:
        if not os.path.isdir(path):
            raise

In [22]:
def create_callbacks(model, training_model, prediction_model, validation_generator, config):
    """ Creates the callbacks to use during training.

    Args
        model: The base model.
        training_model: The model that is used for training.
        prediction_model: The model that should be used for validation.
        validation_generator: The generator for creating validation data.
        args: parseargs args object.

    Returns:
        A list of callbacks used for training.
    """
    callbacks = []

    tensorboard_callback = None

    if config["tensorboard-dir"]:
        tensorboard_callback = keras.callbacks.TensorBoard(
            log_dir                = config["tensorboard-dir"],
            histogram_freq         = 0,
            batch_size             = config["batch-size"],
            write_graph            = True,
            write_grads            = False,
            write_images           = False,
            embeddings_freq        = 0,
            embeddings_layer_names = None,
            embeddings_metadata    = None
        )
        callbacks.append(tensorboard_callback)

    if validation_generator:
        evaluation = Evaluate(validation_generator, tensorboard=tensorboard_callback, weighted_average=config["weighted-average"])
        evaluation = RedirectModel(evaluation, prediction_model)
        callbacks.append(evaluation)

    # save the model
    # ensure directory created first; otherwise h5py will error after epoch.
    makedirs(config["snapshot-path"])
    checkpoint = keras.callbacks.ModelCheckpoint(
        os.path.join(
            config["snapshot-path"],
            '{backbone}_{{epoch:02d}}.h5'.format(backbone=config["backbone"])
        ),
        verbose=1,
        # save_best_only=True,
        # monitor="mAP",
        # mode='max'
    )
    checkpoint = RedirectModel(checkpoint, model)
    callbacks.append(checkpoint)

    callbacks.append(keras.callbacks.ReduceLROnPlateau(
        monitor    = 'loss',
        factor     = 0.1,
        patience   = 2,
        verbose    = 1,
        mode       = 'auto',
        min_delta  = 0.0001,
        cooldown   = 0,
        min_lr     = 0
    ))

    return callbacks

In [23]:
# create the callbacks
callbacks = create_callbacks(
    model,
    training_model,
    prediction_model,
    validation_generator,
    config,
)

## Step 6:

In [24]:
# Use multiprocessing if workers > 0
if config["workers"] > 0:
    use_multiprocessing = True
else:
    use_multiprocessing = False

if not config["compute-val-loss"]:
    validation_generator = None

In [None]:
training_model.fit_generator(generator=train_generator,
                             steps_per_epoch=config["steps"],
                             epochs=config["epochs"],
                             verbose=1,
                             callbacks=callbacks,
                             workers=config["workers"],
                             use_multiprocessing=use_multiprocessing,
                             max_queue_size=config["max_queue_size"],
                             validation_data=validation_generator)