In [1]:
import cv2
import numpy as np
import random
import h5py as h5
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Input
from keras.models import Model
from keras.optimizers import Adam
from keras.utils import plot_model
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model
import sys

# Training the F-RCNN

## Importing auxiliary functions

In [2]:
sys.path.append("..") # TODO: fix

In [3]:
from training.read_dataset import read_dataset
from training.create_anchors import create_anchors
from training.calculate_bbox_intesect_over_union import calculate_bbox_intesect_over_union
from training.evaluate_ious import evaluate_ious
from training.create_samples_for_training import create_samples_for_training
from training.parametrize_anchor_box_properties import parametrize_anchor_box_properties
from training.losses import loss_cls, loss_reg

## Defining parameters

In [4]:
N_RATIO_LOSSES = 10.0
N_VALIDATION_DATA = 10
N_TEST_DATA = 5
N_DATA_EPOCHS = 10
N_EPOCHS = 100
N_PATIENCE = 200
RANDOM_SEED = 13

IMG_SIZE = (500, 500)
MODE = "mask"

DEBUG = False

N_SUB = 8
ANCHOR_REAL_SIZE = [16, 24, 32, 48, 64]
POS_IOU_THRESHOLD = 0.50
NEG_IOU_THRESHOLD = 0.1
N_FILTERS = 16
KERNEL_SIZE = 5

ANCHOR_RATIOS = [0.5, 0.8, 1.0, 1.1]

SHOW_N_POS = False
POS_RATIO = 0.5
N_SAMPLES = 30

ADAM_LEARNING_RATE = 3.0e-4

In [5]:
# Loading image size from config file
img_size = IMG_SIZE

# Subscaling/Anchor values (2^n), ex: 1, 2, 4, 8, 16, 32
N_SUB = 2

# Defining anchor sizes
ANCHOR_SIZES = np.array(ANCHOR_REAL_SIZE) // N_SUB

# Defining number of anchors sized and rations
N_ANCHORS =  len(ANCHOR_SIZES)
N_RATIOS  =  len(ANCHOR_RATIOS)

# Defining the dataset folder
dataset_folder = "dataset_test"
imgs, bbox_datasets = read_dataset(img_size, dataset_folder)

# Shuffling dataset - random seed is 13
N_dataset = imgs.shape[0]
array_for_shuffling = np.arange(N_dataset, dtype=int)
random.Random(13).shuffle(array_for_shuffling)

# Shuffling images
imgs = imgs[array_for_shuffling]

# Shuffling bboxes
bbox_datasets_new = []
for new_index in array_for_shuffling:
    bbox_datasets_new.append(bbox_datasets[new_index])
bbox_datasets = bbox_datasets_new

# Defining which image is used during training
if MODE == "mask":
    imgs = imgs[:,:,:,0]
elif MODE == "raw":
    imgs = imgs[:,:,:,1]

Reading dataset...
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27


In [6]:
input_image = Input(shape=(img_size[0],img_size[1],1))
conv_3_3_1 = Conv2D(
    filters=N_FILTERS,
    kernel_size=KERNEL_SIZE,
    padding='same',
    name="3x3-1"
    )(input_image)
max_pool_1 = MaxPooling2D((2,2),
                          name="max_pool_1")(conv_3_3_1)

conv_3_3_2 = Conv2D(
    filters=N_FILTERS,
    kernel_size=KERNEL_SIZE,
    padding='same',
    name="3x3-2"
)(max_pool_1)


max_pool_2 = MaxPooling2D((2,2),
                          name="max_pool_2")(conv_3_3_2)

conv_3_3_3 = Conv2D(
    filters=N_FILTERS,
    kernel_size=KERNEL_SIZE,
    padding='same',
    name="3x3-3"
)(max_pool_2)

max_pool_3 = MaxPooling2D((2,2),
                          name="max_pool_3")(conv_3_3_3)

conv_3_3_4 = Conv2D(
    filters=N_FILTERS,
    kernel_size=KERNEL_SIZE,
    padding='same',
    name="3x3-4"
)(max_pool_3)

max_pool_4 = MaxPooling2D((2,2),
                          name="max_pool_4")(conv_3_3_4)

conv_3_3_5 = Conv2D(
    filters=N_FILTERS,
    kernel_size=KERNEL_SIZE,
    padding='same',
    name="3x3-5"
)(max_pool_4)

max_pool_5 = MaxPooling2D((2,2),
                          name="max_pool_5")(conv_3_3_5)

conv_3_3_6 = Conv2D(
    filters=N_FILTERS,
    kernel_size=KERNEL_SIZE,
    padding='same',
    name="3x3-6"
)(max_pool_5)

max_pool_6 = MaxPooling2D((2,2),
                          name="max_pool_6")(conv_3_3_6)

In [7]:
if N_SUB == 1:
    last_layer = conv_3_3_1
elif N_SUB == 2:
    last_layer = max_pool_1
elif N_SUB == 4:
    last_layer = max_pool_2
elif N_SUB == 8:
    last_layer = max_pool_3
elif N_SUB == 16:
    last_layer = max_pool_4
elif N_SUB == 32:
    last_layer = max_pool_5

In [8]:
output_scores = Conv2D(
    filters=N_ANCHORS * N_RATIOS,
    kernel_size=(1, 1),
    activation="sigmoid",
    kernel_initializer="uniform",
    name="l_reg"
)(last_layer)

output_regressor = Conv2D(
    filters=N_ANCHORS * N_RATIOS * 4,
    kernel_size=(1, 1),
    activation="linear",
    kernel_initializer="uniform",
    name="bb_reg"
)(last_layer)

opt = Adam(learning_rate=ADAM_LEARNING_RATE)
model = Model(inputs=[input_image], outputs=[output_scores, output_regressor])
model.compile(optimizer=opt, loss={'l_reg':loss_cls, 'bb_reg':loss_reg})


plot_model(model, show_shapes=True, to_file="model_true.png")
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 500, 500, 1) 0                                            
__________________________________________________________________________________________________
3x3-1 (Conv2D)                  (None, 500, 500, 16) 416         input_1[0][0]                    
__________________________________________________________________________________________________
max_pool_1 (MaxPooling2D)       (None, 250, 250, 16) 0           3x3-1[0][0]                      
__________________________________________________________________________________________________
l_reg (Conv2D)                  (None, 250, 250, 20) 340         max_pool_1[0][0]                 
_______________________________________________________________________________________

In [9]:
def generate_validation_data(imgs, bbox_datasets, img_size, N_SUB, N_ANCHORS, ANCHOR_SIZES, ANCHOR_RATIOS, N_RATIOS,
                             POS_IOU_THRESHOLD, NEG_IOU_THRESHOLD, DEBUG, POS_RATIO, N_SAMPLES, SHOW_N_POS):

    # Creating the anchors
    anchors, index_anchors_valid = create_anchors(img_size, N_SUB, ANCHOR_RATIOS, ANCHOR_SIZES, imgs[0])

    # Number of validation images
    N_validation = imgs.shape[0]

    # Initializing the arrays
    batch_imgs             = np.zeros((N_validation, img_size[0], img_size[1], 1), dtype=np.float64)
    batch_anchor_labels    = np.zeros((N_validation, img_size[0] // N_SUB, img_size[1] // N_SUB, N_ANCHORS * N_RATIOS),     dtype=np.float64)
    batch_anchor_locations = np.zeros((N_validation, img_size[0] // N_SUB, img_size[1] // N_SUB, 4 * N_ANCHORS * N_RATIOS), dtype=np.float64)

    index = 0
    for img, bbox_dataset in zip(imgs, bbox_datasets):

        # Calculating anchor/bbox_dataset IoUs
        ious = calculate_bbox_intesect_over_union(anchors, index_anchors_valid, bbox_dataset, img)

        # Evaluating if the anchors are valid or invalid based on the IoUs
        labels, anchor_argmax_ious = evaluate_ious(anchors, index_anchors_valid, ious, bbox_dataset, img, POS_IOU_THRESHOLD, NEG_IOU_THRESHOLD, debug=DEBUG)

        # Creating the samples for training
        anchor_labels = create_samples_for_training(anchors, index_anchors_valid, anchor_argmax_ious, labels, ious, bbox_dataset, img,
                                                    POS_RATIO, N_SAMPLES, SHOW_N_POS, debug=DEBUG)

        # Reshaping the anchor labels to follow the image/sub-image coordinates
        anchor_labels = np.reshape(anchor_labels, (img_size[0] // N_SUB, img_size[1] // N_SUB, N_ANCHORS * N_RATIOS))

        # Parametrizing the anchor box properties
        anchor_locations = parametrize_anchor_box_properties(anchors, anchor_argmax_ious, labels, ious, bbox_dataset, img)

        # Reshaping the anchor locations to follow the image/sub-image coordinates
        anchor_locations = np.reshape(anchor_locations, (img_size[0] // N_SUB, img_size[1] // N_SUB, 4 * N_ANCHORS * N_RATIOS))

        # Converting to float
        anchor_labels = anchor_labels.astype(np.float64)

        # Storing images
        batch_imgs[index, :, :, 0] = img

        # Updating anchor labels and properties(locations)
        batch_anchor_labels[index,:,:,:] = anchor_labels
        batch_anchor_locations[index,:,:,:] = anchor_locations

        index+=1
        print(index)

    # Returning samples for model validation
    return batch_imgs, [batch_anchor_labels, batch_anchor_locations]

def input_generator(imgs, bbox_datasets, img_size, N_SUB, ANCHOR_RATIOS, ANCHOR_SIZES, N_DATA_EPOCHS, N_ANCHORS, N_RATIOS,
                    POS_IOU_THRESHOLD, NEG_IOU_THRESHOLD, DEBUG, POS_RATIO, N_SAMPLES, SHOW_N_POS):

    # Creating the anchors
    anchors, index_anchors_valid = create_anchors(img_size, N_SUB, ANCHOR_RATIOS, ANCHOR_SIZES, imgs[0])


    while True:
        # Picking a random number of images for training
        random_indexes = np.random.randint(low=0, high=len(imgs)-1, size=N_DATA_EPOCHS)
        # Initializing the arrays
        batch_imgs             = np.zeros((len(random_indexes), img_size[0], img_size[1], 1), dtype=np.float64)
        batch_anchor_labels    = np.zeros((len(random_indexes), img_size[0] // N_SUB, img_size[1] // N_SUB, N_ANCHORS * N_RATIOS),     dtype=np.float64)
        batch_anchor_locations = np.zeros((len(random_indexes), img_size[0] // N_SUB, img_size[1] // N_SUB, 4 * N_ANCHORS * N_RATIOS), dtype=np.float64)

        # Looping over the selected indexes and generating the input dataset
        for k, random_index in enumerate(random_indexes):

            # Retriegving the image and the bbox-values
            img = imgs[random_index]
            bbox_dataset = bbox_datasets[random_index]

            # Calculating anchor/bbox_dataset IoUs
            ious = calculate_bbox_intesect_over_union(anchors, index_anchors_valid, bbox_dataset, img)

            # Evaluating if the anchors are valid or invalid based on the IoUs
            labels, anchor_argmax_ious = evaluate_ious(anchors, index_anchors_valid, ious, bbox_dataset, img, POS_IOU_THRESHOLD, NEG_IOU_THRESHOLD, debug=DEBUG)

            # Creating the samples for training

            anchor_labels = create_samples_for_training(anchors, index_anchors_valid, anchor_argmax_ious, labels, ious, bbox_dataset, img,
                                                        POS_RATIO, N_SAMPLES, SHOW_N_POS, debug=False)

            # Reshaping the anchor labels to follow the image/sub-image coordinates
            anchor_labels = np.reshape(anchor_labels, (img_size[0] // N_SUB, img_size[1] // N_SUB, N_ANCHORS * N_RATIOS))

            # Parametrizing the anchor box properties
            anchor_locations = parametrize_anchor_box_properties(anchors, anchor_argmax_ious, labels, ious, bbox_dataset, img)

            # Reshaping the anchor locations to follow the image/sub-image coordinates
            anchor_locations = np.reshape(anchor_locations, (img_size[0] // N_SUB, img_size[1] // N_SUB, 4 * N_ANCHORS * N_RATIOS))

            # Converting to float
            anchor_labels = anchor_labels.astype(np.float64)

            # Storing images
            batch_imgs[k, :, :, 0] = img

            print(random_indexes)

            # Updating anchor labels and properties(locations)
            batch_anchor_labels[k,:,:,:] = anchor_labels
            batch_anchor_locations[k,:,:,:] = anchor_locations

            # Returning/Yielding samples for model training
            yield batch_imgs, (batch_anchor_labels, batch_anchor_locations)

In [10]:
# Defining the best fRCNN model name
best_model_name = "best_fRCNN_%s_%02d.keras" % (MODE, N_SUB)

# Model checkpoint for saving best models
checkpoint = ModelCheckpoint(best_model_name,
                              verbose=1,
                              save_best_only=True,
                              monitor='val_loss',
                              mode='auto')

# Model checkpoint for early stopping
early_stopping = EarlyStopping(monitor='val_loss',
                               mode='min',
                               verbose=1,
                               patience=N_PATIENCE)


validation_data = generate_validation_data(imgs[:N_VALIDATION_DATA], bbox_datasets[:N_VALIDATION_DATA], IMG_SIZE, N_SUB, N_ANCHORS, ANCHOR_SIZES, ANCHOR_RATIOS, N_RATIOS,
                             POS_IOU_THRESHOLD, NEG_IOU_THRESHOLD, DEBUG, POS_RATIO, N_SAMPLES, SHOW_N_POS)

model.fit(input_generator(imgs[N_VALIDATION_DATA:], bbox_datasets[N_VALIDATION_DATA:], IMG_SIZE, N_SUB, ANCHOR_RATIOS, ANCHOR_SIZES, N_DATA_EPOCHS, N_ANCHORS, N_RATIOS,
                    POS_IOU_THRESHOLD, NEG_IOU_THRESHOLD, DEBUG, POS_RATIO, N_SAMPLES, SHOW_N_POS),
          steps_per_epoch=10,
          epochs=N_EPOCHS,
          callbacks=[checkpoint, early_stopping],
          validation_data = validation_data)

1
2
3
4
5
6
7
8
9
10
[ 4 13  9  6  8  7  4 14 12  5]
Epoch 1/100
[ 4 13  9  6  8  7  4 14 12  5]
 1/10 [==>...........................] - ETA: 0s - loss: 0.6931 - l_reg_loss: 0.6931 - bb_reg_loss: 2.1623e-07[ 4 13  9  6  8  7  4 14 12  5]
 2/10 [=====>........................] - ETA: 21s - loss: 0.6931 - l_reg_loss: 0.6931 - bb_reg_loss: 3.4429e-07[ 4 13  9  6  8  7  4 14 12  5]
[ 4 13  9  6  8  7  4 14 12  5]

Epoch 00001: val_loss improved from inf to 0.88242, saving model to best_fRCNN_mask_02.keras
Epoch 2/100
[14 13  6  8 14 15 14  5 14  9]
 1/10 [==>...........................] - ETA: 0s - loss: 0.6915 - l_reg_loss: 0.6915 - bb_reg_loss: 4.0629e-07[14 13  6  8 14 15 14  5 14  9]
 2/10 [=====>........................] - ETA: 17s - loss: 0.6914 - l_reg_loss: 0.6914 - bb_reg_loss: 7.2112e-07[14 13  6  8 14 15 14  5 14  9]
Epoch 00002: val_loss improved from 0.88242 to 0.88113, saving model to best_fRCNN_mask_02.keras
[ 7  8 12  1  7  0 16 15 10 10]
Epoch 3/100
[ 7  8 12  1  7  0 16 


KeyboardInterrupt

