In [1]:
import cv2
import numpy as np
import random
import h5py as h5
from pathlib import Path
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Input
from keras.models import Model
from keras.optimizers import Adam
from keras.utils import plot_model
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model
from tqdm import tqdm
import tensorflow as tf


# Training the F-RCNN

## Importing auxiliary functions

In [2]:
import sys
sys.path.append("..") # TODO: fix

In [3]:
from training.read_dataset import read_dataset
from training.create_anchors import create_anchors
from training.calculate_bbox_intesect_over_union import calculate_bbox_intesect_over_union
from training.evaluate_ious import evaluate_ious
from training.create_samples_for_training import create_samples_for_training
from training.parametrize_anchor_box_properties import parametrize_anchor_box_properties
from training.losses import loss_cls, loss_reg
from training.run import generate_validation_data
from training.run import input_generator

## Defining the path of the dataset

In [4]:
dataset_folder = Path("dataset_FRCNN", "Output")

## Reading the dataset

In [5]:
IMG_SIZE = (256, 256)

In [6]:
images_train, bbox_datasets_train = read_dataset(
    IMG_SIZE, dataset_folder, "Training"
)

images_val, bbox_datasets_val = read_dataset(
    IMG_SIZE, dataset_folder, "Validation"
)

## Shuffling the data

In [7]:
RANDOM_SEED = 13

In [8]:
# Shuffling dataset
N_dataset = images_train.shape[0]
array_for_shuffling = np.arange(N_dataset, dtype=int)
random.Random(RANDOM_SEED).shuffle(array_for_shuffling)

# Shuffling images
images_train = images_train[array_for_shuffling]

# Shuffling bboxes
bbox_datasets_new_train = []
for new_index in array_for_shuffling:
    bbox_datasets_new_train.append(bbox_datasets_train[new_index])
bbox_datasets_train = bbox_datasets_new_train

In [9]:
# Shuffling dataset
N_val_dataset = images_val.shape[0]
array_for_shuffling = np.arange(N_val_dataset, dtype=int)
random.Random(RANDOM_SEED).shuffle(array_for_shuffling)

# Shuffling images
images_val = images_val[array_for_shuffling]

# Shuffling bboxes
bbox_datasetes_new_val = []
for new_index in array_for_shuffling:
    bbox_datasetes_new_val.append(bbox_datasets_val[new_index])
bbox_datasets_val = bbox_datasetes_new_val

In [10]:
# Defining which image is used during training
MODE = "mask"
if MODE == "mask":
    images_train = images_train[:,:,:,0]
elif MODE == "raw":
    images_train = images_train[:,:,:,1]

In [11]:
# Defining which image is used during training
MODE = "mask"
if MODE == "mask":
    images_val = images_val[:,:,:,0]
elif MODE == "raw":
    images_val = images_val[:,:,:,1]

## Defining some parameters

In [12]:
N_SUB = 8
ANCHOR_REAL_SIZE = [16, 24, 32, 48, 64]
POS_IOU_THRESHOLD = 0.50
NEG_IOU_THRESHOLD = 0.1
N_FILTERS = 16
KERNEL_SIZE = 5

ANCHOR_RATIOS = [0.5, 0.8, 1.0, 1.1]

SHOW_N_POS = False
POS_RATIO = 0.5
N_SAMPLES = 30

ADAM_LEARNING_RATE = 3.0e-4

# Subscaling/Anchor values (2^n), ex: 1, 2, 4, 8, 16, 32
N_SUB = 2

# Defining anchor sizes
ANCHOR_SIZES = np.array(ANCHOR_REAL_SIZE) // N_SUB

# Defining number of anchors sized and rations
N_ANCHORS =  len(ANCHOR_SIZES)
N_RATIOS  =  len(ANCHOR_RATIOS)

## Defining the F-RCNN architecture

In [13]:
input_image = Input(shape=(IMG_SIZE[0],IMG_SIZE[1],1))

conv_3_3_1 = Conv2D(
    filters=N_FILTERS,
    kernel_size=KERNEL_SIZE,
    padding='same',
    name="3x3-1"
    )(input_image)
max_pool_1 = MaxPooling2D((2,2),
                          name="max_pool_1")(conv_3_3_1)

conv_3_3_2 = Conv2D(
    filters=N_FILTERS,
    kernel_size=KERNEL_SIZE,
    padding='same',
    name="3x3-2"
)(max_pool_1)


max_pool_2 = MaxPooling2D((2,2),
                          name="max_pool_2")(conv_3_3_2)

conv_3_3_3 = Conv2D(
    filters=N_FILTERS,
    kernel_size=KERNEL_SIZE,
    padding='same',
    name="3x3-3"
)(max_pool_2)

max_pool_3 = MaxPooling2D((2,2),
                          name="max_pool_3")(conv_3_3_3)

conv_3_3_4 = Conv2D(
    filters=N_FILTERS,
    kernel_size=KERNEL_SIZE,
    padding='same',
    name="3x3-4"
)(max_pool_3)

max_pool_4 = MaxPooling2D((2,2),
                          name="max_pool_4")(conv_3_3_4)

conv_3_3_5 = Conv2D(
    filters=N_FILTERS,
    kernel_size=KERNEL_SIZE,
    padding='same',
    name="3x3-5"
)(max_pool_4)

max_pool_5 = MaxPooling2D((2,2),
                          name="max_pool_5")(conv_3_3_5)

conv_3_3_6 = Conv2D(
    filters=N_FILTERS,
    kernel_size=KERNEL_SIZE,
    padding='same',
    name="3x3-6"
)(max_pool_5)

max_pool_6 = MaxPooling2D((2,2),
                          name="max_pool_6")(conv_3_3_6)

In [14]:
N_SUB = 8

if N_SUB == 1:
    last_layer = conv_3_3_1
elif N_SUB == 2:
    last_layer = max_pool_1
elif N_SUB == 4:
    last_layer = max_pool_2
elif N_SUB == 8:
    last_layer = max_pool_3
elif N_SUB == 16:
    last_layer = max_pool_4
elif N_SUB == 32:
    last_layer = max_pool_5

In [15]:
output_scores = Conv2D(
    filters=N_ANCHORS * N_RATIOS,
    kernel_size=(1, 1),
    activation="sigmoid",
    kernel_initializer="uniform",
    name="l_reg"
)(last_layer)

output_regressor = Conv2D(
    filters=N_ANCHORS * N_RATIOS * 4,
    kernel_size=(1, 1),
    activation="linear",
    kernel_initializer="uniform",
    name="bb_reg"
)(last_layer)

opt = Adam(learning_rate=ADAM_LEARNING_RATE)
model = Model(inputs=[input_image], outputs=[output_scores, output_regressor])
model.compile(optimizer=opt, loss={'l_reg':loss_cls, 'bb_reg':loss_reg})

Below we can get a summary of the model. Alternatively (and recommended), you can check the F-RCNN architecture with Keras' `plot_model` function (see [here](https://keras.io/api/utils/model_plotting_utils/)).

In [16]:
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 256, 256, 1) 0                                            
__________________________________________________________________________________________________
3x3-1 (Conv2D)                  (None, 256, 256, 16) 416         input_1[0][0]                    
__________________________________________________________________________________________________
max_pool_1 (MaxPooling2D)       (None, 128, 128, 16) 0           3x3-1[0][0]                      
__________________________________________________________________________________________________
3x3-2 (Conv2D)                  (None, 128, 128, 16) 6416        max_pool_1[0][0]                 
_______________________________________________________________________________________

## Training the model

Now, we need to setup a few callbacks to be executed during training.

The first one is the `early_stopping` callback, which defines a `PATIENCE` value to limit the number of running epochs (`EPOCHS`).

The second one, `best_model`, is a callback to store the F-RCNN with the lowest validation loss.

In [17]:
N_EPOCHS = 20
N_PATIENCE = 5

In [18]:
N_VALIDATION_DATA = 2
N_DATA_EPOCHS = 10
DEBUG = False

In [19]:
# Defining the best fRCNN model name
best_model_name = "frcnn_synthetic_contours.keras" # % (MODE, N_SUB)

early_stopping = EarlyStopping(monitor='val_loss',
                               mode='min',
                               verbose=1,
                               patience=N_PATIENCE)

best_model = ModelCheckpoint(best_model_name,
                              verbose=1,
                              save_best_only=True,
                              monitor='val_loss',
                              mode='auto')

On this function below, it will generate the validation data necessary to train the model.

In [20]:
validation_data = generate_validation_data(images_val,
                                           bbox_datasets_val,
                                           IMG_SIZE,
                                           N_SUB,
                                           N_ANCHORS,
                                           ANCHOR_SIZES,
                                           ANCHOR_RATIOS,
                                           N_RATIOS,
                                           POS_IOU_THRESHOLD,
                                           NEG_IOU_THRESHOLD,
                                           DEBUG,
                                           POS_RATIO,
                                           N_SAMPLES,
                                           SHOW_N_POS)

Number of validation images: 2


Generating validation data: 2it [00:01,  1.38it/s]


Now, it's time to train the model with our data.

In [21]:
model.fit(input_generator(images_train,
                          bbox_datasets_train,
                          IMG_SIZE,
                          N_SUB,
                          ANCHOR_RATIOS,
                          ANCHOR_SIZES,
                          N_DATA_EPOCHS,
                          N_ANCHORS,
                          N_RATIOS,
                          POS_IOU_THRESHOLD,
                          NEG_IOU_THRESHOLD,
                          DEBUG, 
                          POS_RATIO,
                          N_SAMPLES,
                          SHOW_N_POS),
          steps_per_epoch=10,
          epochs=N_EPOCHS,
          callbacks=[best_model, early_stopping],
          validation_data = validation_data)

[17 24 11 38 37 42 10 16  1  8]
Epoch 1/20
[17 24 11 38 37 42 10 16  1  8]
 1/10 [==>...........................] - ETA: 0s - loss: 0.6931 - l_reg_loss: 0.6931 - bb_reg_loss: 0.0000e+00[17 24 11 38 37 42 10 16  1  8]
 2/10 [=====>........................] - ETA: 1s - loss: 0.6931 - l_reg_loss: 0.6931 - bb_reg_loss: 2.8542e-08[17 24 11 38 37 42 10 16  1  8]
Epoch 00001: val_loss improved from inf to 386.31979, saving model to frcnn_synthetic_contours.keras
Epoch 2/20
[22 13  9  7 39 10 41 20 26 39]
 1/10 [==>...........................] - ETA: 0s - loss: 0.6907 - l_reg_loss: 0.6907 - bb_reg_loss: 6.5335e-07[22 13  9  7 39 10 41 20 26 39]
 2/10 [=====>........................] - ETA: 1s - loss: 0.6906 - l_reg_loss: 0.6906 - bb_reg_loss: 7.3934e-07[22 13  9  7 39 10 41 20 26 39]
Epoch 00002: val_loss improved from 386.31979 to 386.31152, saving model to frcnn_synthetic_contours.keras
Epoch 3/20
[ 1 28 11 31  6 14  2 38 33  1]
 1/10 [==>...........................] - ETA: 0s - loss: 0.6864

<tensorflow.python.keras.callbacks.History at 0x1b796471a90>

At this point, the model is trained. In order to use the model as a segmentation tool, please check the `predicting_example`.
