In [2]:
import cv2
import numpy as np
import random
import h5py as h5
from pathlib import Path
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Input
from keras.models import Model
from keras.optimizers import Adam
from keras.utils import plot_model
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model

# Training the F-RCNN

## Importing auxiliary functions

In [23]:
import sys
sys.path.append("..") # TODO: fix

In [25]:
from training.read_dataset import read_dataset
from training.create_anchors import create_anchors
from training.calculate_bbox_intesect_over_union import calculate_bbox_intesect_over_union
from training.evaluate_ious import evaluate_ious
from training.create_samples_for_training import create_samples_for_training
from training.parametrize_anchor_box_properties import parametrize_anchor_box_properties
from training.losses import loss_cls, loss_reg
from training.run import generate_validation_data
from training.run import input_generator

## Defining parameters

In [26]:
N_RATIO_LOSSES = 10.0
N_VALIDATION_DATA = 10
N_TEST_DATA = 5
N_DATA_EPOCHS = 10
N_EPOCHS = 100
N_PATIENCE = 200
RANDOM_SEED = 13

IMG_SIZE = (500, 500)
MODE = "mask"

DEBUG = False

N_SUB = 8
ANCHOR_REAL_SIZE = [16, 24, 32, 48, 64]
POS_IOU_THRESHOLD = 0.50
NEG_IOU_THRESHOLD = 0.1
N_FILTERS = 16
KERNEL_SIZE = 5

ANCHOR_RATIOS = [0.5, 0.8, 1.0, 1.1]

SHOW_N_POS = False
POS_RATIO = 0.5
N_SAMPLES = 30

ADAM_LEARNING_RATE = 3.0e-4

# Subscaling/Anchor values (2^n), ex: 1, 2, 4, 8, 16, 32
N_SUB = 2

# Defining anchor sizes
ANCHOR_SIZES = np.array(ANCHOR_REAL_SIZE) // N_SUB

# Defining number of anchors sized and rations
N_ANCHORS =  len(ANCHOR_SIZES)
N_RATIOS  =  len(ANCHOR_RATIOS)

## Reading the dataset

In [27]:
dataset_folder = Path("dataset_test")
imgs, bbox_datasets = read_dataset(img_size, dataset_folder)

## Shuffling the data

In [28]:
# Shuffling dataset
N_dataset = imgs.shape[0]
array_for_shuffling = np.arange(N_dataset, dtype=int)
random.Random(13).shuffle(array_for_shuffling)

# Shuffling images
imgs = imgs[array_for_shuffling]

# Shuffling bboxes
bbox_datasets_new = []
for new_index in array_for_shuffling:
    bbox_datasets_new.append(bbox_datasets[new_index])
bbox_datasets = bbox_datasets_new

In [29]:
# Defining which image is used during training
if MODE == "mask":
    imgs = imgs[:,:,:,0]
elif MODE == "raw":
    imgs = imgs[:,:,:,1]

## Defining the F-RCNN architecture

In [30]:
input_image = Input(shape=(img_size[0],img_size[1],1))
conv_3_3_1 = Conv2D(
    filters=N_FILTERS,
    kernel_size=KERNEL_SIZE,
    padding='same',
    name="3x3-1"
    )(input_image)
max_pool_1 = MaxPooling2D((2,2),
                          name="max_pool_1")(conv_3_3_1)

conv_3_3_2 = Conv2D(
    filters=N_FILTERS,
    kernel_size=KERNEL_SIZE,
    padding='same',
    name="3x3-2"
)(max_pool_1)


max_pool_2 = MaxPooling2D((2,2),
                          name="max_pool_2")(conv_3_3_2)

conv_3_3_3 = Conv2D(
    filters=N_FILTERS,
    kernel_size=KERNEL_SIZE,
    padding='same',
    name="3x3-3"
)(max_pool_2)

max_pool_3 = MaxPooling2D((2,2),
                          name="max_pool_3")(conv_3_3_3)

conv_3_3_4 = Conv2D(
    filters=N_FILTERS,
    kernel_size=KERNEL_SIZE,
    padding='same',
    name="3x3-4"
)(max_pool_3)

max_pool_4 = MaxPooling2D((2,2),
                          name="max_pool_4")(conv_3_3_4)

conv_3_3_5 = Conv2D(
    filters=N_FILTERS,
    kernel_size=KERNEL_SIZE,
    padding='same',
    name="3x3-5"
)(max_pool_4)

max_pool_5 = MaxPooling2D((2,2),
                          name="max_pool_5")(conv_3_3_5)

conv_3_3_6 = Conv2D(
    filters=N_FILTERS,
    kernel_size=KERNEL_SIZE,
    padding='same',
    name="3x3-6"
)(max_pool_5)

max_pool_6 = MaxPooling2D((2,2),
                          name="max_pool_6")(conv_3_3_6)

In [31]:
if N_SUB == 1:
    last_layer = conv_3_3_1
elif N_SUB == 2:
    last_layer = max_pool_1
elif N_SUB == 4:
    last_layer = max_pool_2
elif N_SUB == 8:
    last_layer = max_pool_3
elif N_SUB == 16:
    last_layer = max_pool_4
elif N_SUB == 32:
    last_layer = max_pool_5

In [32]:
output_scores = Conv2D(
    filters=N_ANCHORS * N_RATIOS,
    kernel_size=(1, 1),
    activation="sigmoid",
    kernel_initializer="uniform",
    name="l_reg"
)(last_layer)

output_regressor = Conv2D(
    filters=N_ANCHORS * N_RATIOS * 4,
    kernel_size=(1, 1),
    activation="linear",
    kernel_initializer="uniform",
    name="bb_reg"
)(last_layer)

opt = Adam(learning_rate=ADAM_LEARNING_RATE)
model = Model(inputs=[input_image], outputs=[output_scores, output_regressor])
model.compile(optimizer=opt, loss={'l_reg':loss_cls, 'bb_reg':loss_reg})


plot_model(model, show_shapes=True, to_file="model_true.png")
model.summary()

Model: "functional_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 500, 500, 1) 0                                            
__________________________________________________________________________________________________
3x3-1 (Conv2D)                  (None, 500, 500, 16) 416         input_2[0][0]                    
__________________________________________________________________________________________________
max_pool_1 (MaxPooling2D)       (None, 250, 250, 16) 0           3x3-1[0][0]                      
__________________________________________________________________________________________________
l_reg (Conv2D)                  (None, 250, 250, 20) 340         max_pool_1[0][0]                 
_______________________________________________________________________________________

## Training the model

In [33]:
# Defining the best fRCNN model name
best_model_name = "best_fRCNN_%s_%02d.keras" % (MODE, N_SUB)

# Model checkpoint for saving best models
checkpoint = ModelCheckpoint(best_model_name,
                              verbose=1,
                              save_best_only=True,
                              monitor='val_loss',
                              mode='auto')

# Model checkpoint for early stopping
early_stopping = EarlyStopping(monitor='val_loss',
                               mode='min',
                               verbose=1,
                               patience=N_PATIENCE)


validation_data = generate_validation_data(imgs[:N_VALIDATION_DATA], bbox_datasets[:N_VALIDATION_DATA], IMG_SIZE, N_SUB, N_ANCHORS, ANCHOR_SIZES, ANCHOR_RATIOS, N_RATIOS,
                             POS_IOU_THRESHOLD, NEG_IOU_THRESHOLD, DEBUG, POS_RATIO, N_SAMPLES, SHOW_N_POS)

model.fit(input_generator(imgs[N_VALIDATION_DATA:], bbox_datasets[N_VALIDATION_DATA:], IMG_SIZE, N_SUB, ANCHOR_RATIOS, ANCHOR_SIZES, N_DATA_EPOCHS, N_ANCHORS, N_RATIOS,
                    POS_IOU_THRESHOLD, NEG_IOU_THRESHOLD, DEBUG, POS_RATIO, N_SAMPLES, SHOW_N_POS),
          steps_per_epoch=10,
          epochs=N_EPOCHS,
          callbacks=[checkpoint, early_stopping],
          validation_data = validation_data)

1
2
3
4
5
6
7
8
9
10
[ 9  9 11  1 10 16 14 13 12  0]
Epoch 1/100
[ 9  9 11  1 10 16 14 13 12  0]
 1/10 [==>...........................] - ETA: 0s - loss: 0.6931 - l_reg_loss: 0.6931 - bb_reg_loss: 2.6746e-07[ 9  9 11  1 10 16 14 13 12  0]
 2/10 [=====>........................] - ETA: 16s - loss: 0.6931 - l_reg_loss: 0.6931 - bb_reg_loss: 4.2798e-07[ 9  9 11  1 10 16 14 13 12  0]

Epoch 00001: val_loss improved from inf to 0.87226, saving model to best_fRCNN_mask_02.keras
Epoch 2/100
[16 15  2 15  3 16  0  6  0  7]
 1/10 [==>...........................] - ETA: 0s - loss: 0.6915 - l_reg_loss: 0.6915 - bb_reg_loss: 2.9870e-07[16 15  2 15  3 16  0  6  0  7]


KeyboardInterrupt: 