# Faster R-CNN

## Environment

In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import time
import losses
import config
import data_generator
import roi_utils
from rpn import rpn
import resnet
import pascal_voc_parser
import os
import json
from RoiPooling import RoiPooling
# tf.enable_eager_execution()

config = config.Config()
base_nn_weights_path = os.path.abspath('.') + '/' + resnet.WEIGHT_PATH
num_epochs = 2000
data_path = os.path.abspath('.') + '/data'
temp_img_data_path = os.path.abspath('.') + '/temp_img_data.json'
print(data_path)

  from ._conv import register_converters as _register_converters


F:\myMachineLearning\faster_rcnn\src/data


## Dataset

In [2]:
# pascal voc 2007
if not os.path.exists(data_path):
  os.mkdir(data_path)
if not os.path.exists(data_path + '/VOC2007.tar'):
    annotation_zip = keras.utils.get_file(data_path + '/VOC2007.tar',
                                        extract=True,
                                        cache_subdir=data_path,
                                        origin='http://pjreddie.com/media/files/VOCtrainval_06-Nov-2007.tar')

In [3]:
# pascal voc 2012
# annotation_zip = keras.utils.get_file(data_path + '/VOC2012.tar',
#                                         extract=True,
#                                         cache_subdir=data_path,
#                                         origin='http://pjreddie.com/media/files/VOCtrainval_11-May-2012.tar')

data_path += '/VOCdevkit/'

In [4]:
all_imgs, classes_count, class_mapping = pascal_voc_parser.get_data(data_path)
if 'bg' not in classes_count:
    classes_count['bg'] = 0
    class_mapping['bg'] = len(class_mapping)
config.class_mapping = class_mapping
class_mapping_inv = {v: k for k, v in class_mapping.items()}
print(len(all_imgs), classes_count, class_mapping)

Parsing annotation files
5011 {'chair': 1432, 'car': 1644, 'horse': 406, 'person': 5447, 'bicycle': 418, 'cat': 389, 'dog': 538, 'train': 328, 'aeroplane': 331, 'diningtable': 310, 'tvmonitor': 367, 'bird': 599, 'bottle': 634, 'motorbike': 390, 'pottedplant': 625, 'boat': 398, 'sofa': 425, 'sheep': 353, 'cow': 356, 'bus': 272, 'bg': 0} {'chair': 0, 'car': 1, 'horse': 2, 'person': 3, 'bicycle': 4, 'cat': 5, 'dog': 6, 'train': 7, 'aeroplane': 8, 'diningtable': 9, 'tvmonitor': 10, 'bird': 11, 'bottle': 12, 'motorbike': 13, 'pottedplant': 14, 'boat': 15, 'sofa': 16, 'sheep': 17, 'cow': 18, 'bus': 19, 'bg': 20}


In [5]:
# train and test imgs
train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval']
# print(np.array(train_imgs).shape)
# train_imgs = tf.map_fn(lambda x: x, np.array(train_imgs))
test_imgs = [s for s in all_imgs if s['imageset'] == 'test']

print(len(train_imgs), len(test_imgs))

2501 2510


In [6]:
import data_generator
print(train_imgs[0])

# train and test dataset

train_dataset = tf.data.Dataset.from_generator(lambda : data_generator.get_anchor_gt(
    train_imgs, classes_count, config, resnet.get_img_output_length, temp_img_data_path), (tf.float32, tf.float32, tf.float32))
train_dataset = train_dataset.make_one_shot_iterator().get_next()

test_dataset = tf.data.Dataset.from_generator(lambda : data_generator.get_anchor_gt(
    test_imgs, classes_count, config, resnet.get_img_output_length, temp_img_data_path), (tf.float32, tf.float32, tf.float32))
test_dataset = test_dataset.make_one_shot_iterator().get_next()
# with tf.Session() as sess:
#     a, b, c = sess.run(train_dataset)
#     print(a, np.sum(b), np.sum(c))
#     a, b, c = sess.run(train_dataset)
#     print(a, np.sum(b), np.sum(c))

{'filepath': 'F:\\myMachineLearning\\faster_rcnn\\src/data/VOCdevkit/VOC2007\\JPEGImages\\000012.jpg', 'width': 500, 'height': 333, 'bboxes': [{'class': 'car', 'x1': 156, 'x2': 351, 'y1': 97, 'y2': 270, 'difficult': False}], 'imageset': 'trainval'}


## Model

In [7]:
# input
input_shape = (None, None, 3)
img_input = keras.layers.Input(shape=input_shape)
roi_input = keras.layers.Input(shape=(None, 4))

# base network
shared_layers = resnet.nn_base(img_input, trainable=False)

# rpn
num_anchors = len(config.anchor_box_scales) * len(config.anchor_box_ratios)
rpn_layer = rpn(shared_layers, num_anchors)

# TEST
# base_input = keras.layers.Input(shape=(300, 300, 1024))
# base_input = keras.layers.Input(shape=(config.num_rois, 14, 14, 1024))

# roi pooling
# roi_pool = RoiPooling(14, config.num_rois)(shared_layers, roi_input)
# roi_pool = RoiPooling(14, config.num_rois)([shared_layers, roi_input])
# roi_pool = RoiPooling(14, config.num_rois)([base_input, roi_input])

# classifier
# classifier = resnet.classifier(roi_pool, input_shape=(config.num_rois, 14, 14, 1024),
#                                n_classes=len(classes_count), trainable=False)
# roi_pool = resnet.roi(base_input, roi_input, 14, config.num_rois)
# classifier = resnet.classifier(roi_pool, input_shape=(config.num_rois, 14, 14, 1024),
#                                n_classes=len(classes_count), trainable=False)
classifier = resnet.classifier(shared_layers, roi_input, config.num_rois,
                               input_shape=(config.num_rois, 14, 14, 1024),
                               n_classes=len(classes_count), trainable=False)

model_rpn = keras.Model(img_input, rpn_layer[:2])
# model_rpn.summary()
model_classifier = keras.Model([img_input, roi_input], classifier)
# model_classifier = keras.Model([base_input, roi_input], classifier)
model_classifier.summary()

# this is a model that holds both the RPN and the classifier, used to load/save weights for the models
# model_all = keras.Model([img_input, roi_input], rpn_layer[:2] + classifier)

# load weights
print('loading weights from {}'.format(base_nn_weights_path))
model_rpn.load_weights(base_nn_weights_path, by_name=True)
model_classifier.load_weights(base_nn_weights_path, by_name=True)

(?, ?, ?, 1024)
(?, ?, 4)
Tensor("roi_pooling/strided_slice:0", shape=(), dtype=float32) Tensor("roi_pooling/strided_slice_1:0", shape=(), dtype=float32) Tensor("roi_pooling/strided_slice_2:0", shape=(), dtype=float32) Tensor("roi_pooling/strided_slice_3:0", shape=(), dtype=float32)
Tensor("roi_pooling/strided_slice_5:0", shape=(), dtype=float32) Tensor("roi_pooling/strided_slice_6:0", shape=(), dtype=float32) Tensor("roi_pooling/strided_slice_7:0", shape=(), dtype=float32) Tensor("roi_pooling/strided_slice_8:0", shape=(), dtype=float32)
Tensor("roi_pooling/strided_slice_10:0", shape=(), dtype=float32) Tensor("roi_pooling/strided_slice_11:0", shape=(), dtype=float32) Tensor("roi_pooling/strided_slice_12:0", shape=(), dtype=float32) Tensor("roi_pooling/strided_slice_13:0", shape=(), dtype=float32)
Tensor("roi_pooling/strided_slice_15:0", shape=(), dtype=float32) Tensor("roi_pooling/strided_slice_16:0", shape=(), dtype=float32) Tensor("roi_pooling/strided_slice_17:0", shape=(), dtype=flo

ValueError: Output tensors to a Model must be the output of a TensorFlow `Layer` (thus holding past layer metadata). Found: Tensor("dense_class_21/Tensordot:0", shape=(1, 32, 21), dtype=float32)

## Train

In [None]:
# optimizers
optimizer = keras.optimizers.Adam(lr=1e-5)
optimizer_classifier = keras.optimizers.Adam(lr=1e-5)
model_rpn.compile(optimizer=optimizer, 
                  loss=[losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors)])
model_classifier.compile(optimizer=optimizer_classifier, 
                  loss=[losses.class_loss_cls, losses.class_loss_regr(len(classes_count) - 1)])
# model_all.compile(optimizer='sgd', loss='mae')

In [None]:
epoch_length = 1000
iter_num = 0
losses = np.zeros((epoch_length, 5))
rpn_accuracy_rpn_monitor = []
rpn_accuracy_for_epoch = []
start_time = time.time()
best_loss = np.Inf
print('Start training.')
vis = True

In [None]:
for epoch_num in range(num_epochs):
    progbar = keras.utils.Progbar(epoch_length)
    print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))
    while True:
#         try:
        # verbose
        if len(rpn_accuracy_rpn_monitor) == epoch_length and config.verbose:
            mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor))/len(rpn_accuracy_rpn_monitor)
            rpn_accuracy_rpn_monitor = []
            print('Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'.format(mean_overlapping_bboxes, epoch_length))
            if mean_overlapping_bboxes == 0:
                print('RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.')
        # base to RPN
        with tf.Session() as sess:
            X, Y1, Y2 = sess.run(train_dataset)
#         print(X.shape, Y1.shape, Y2.shape)
#                 print(model_rpn.summary())
        loss_rpn = model_rpn.train_on_batch(X, [Y1, Y2])
#         print(loss_rpn)
        P_rpn = model_rpn.predict_on_batch(X)
#         print(P_rpn)
        # get ROI
        R = roi_utils.rpn_to_roi(P_rpn[0], P_rpn[1], config, 
                                 max_boxes=300, overlap_threshold=0.7)
#                 get final ground truth and (x,y,w,h)-format roi
        X2, Y1, Y2, ious = roi_utils.cal_iou(R, temp_img_data_path, config, class_mapping)
        if X2 is None:
            rpn_accuracy_rpn_monitor.append(0)
            rpn_accuracy_for_epoch.append(0)
            continue
        print(X2.shape, Y1.shape, Y2.shape)
        # select limited samples
        neg_samples = np.where(Y1[0, :, -1] == 1)[0] # Y1[:, -1] bg class
        pos_samples = np.where(Y1[0, :, -1] == 0)[0]
        rpn_accuracy_rpn_monitor.append(len(pos_samples))
        rpn_accuracy_for_epoch.append((len(pos_samples)))
        if config.num_rois > 1:
            if len(pos_samples) < config.num_rois // 2:
                sel_pos_samples = pos_samples.tolist()
            else: 
                sel_pos_samples = np.random.choice(pos_samples, config.num_rois // 2, replace=False).tolist()
            try:
                sel_neg_samples = np.random.choice(neg_samples, config.num_rois - len(sel_pos_samples), replace=False).tolist()
            except:
                sel_neg_samples = np.random.choice(neg_samples, config.num_rois - len(sel_pos_samples), replace=True).tolist()
            sel_samples = sel_pos_samples + sel_neg_samples
        else:
            # in the extreme case where num_rois = 1, we pick a random pos or neg sample
            sel_pos_samples = pos_samples.tolist()
            sel_neg_samples = neg_samples.tolist()
            if np.random.randint(0, 2):
                sel_samples = random.choice(neg_samples)
            else:
                sel_samples = random.choice(pos_samples)
        print(sel_samples)
        # final classifier
        loss_class = model_classifier.train_on_batch([X, X2[:, sel_samples, :]],
                                                     [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])
        # TEST
        
#         loss_class = model_classifier.train_on_batch([np.random.rand(1, 300, 300, 1024), np.random.rand(1, 32, 4)],
#                                                      [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])
        print(loss_class)
        # losses
        losses[iter_num, 0] = loss_rpn[1]
        losses[iter_num, 1] = loss_rpn[2]
        losses[iter_num, 2] = loss_class[1]
        losses[iter_num, 3] = loss_class[2]
        losses[iter_num, 4] = loss_class[3]

        iter_num += 1
        progbar.update(iter_num, [('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1])),
                                ('detector_cls', np.mean(losses[:iter_num, 2])), ('detector_regr', np.mean(losses[:iter_num, 3]))])
        if iter_num == epoch_length:
            loss_rpn_cls = np.mean(losses[:, 0])
            loss_rpn_regr = np.mean(losses[:, 1])
            loss_class_cls = np.mean(losses[:, 2])
            loss_class_regr = np.mean(losses[:, 3])
            class_acc = np.mean(losses[:, 4])

            mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch))/len(rpn_accuracy_for_epoch)
            rpn_accuracy_for_epoch = []

            if config.verbose:
                print('Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(mean_overlapping_bboxes))
                print('Classifier accuracy for bounding boxes from RPN: {}'.format(class_acc))
                print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                print('Loss RPN regression: {}'.format(loss_rpn_regr))
                print('Loss Detector classifier: {}'.format(loss_class_cls))
                print('Loss Detector regression: {}'.format(loss_class_regr))
                print('Elapsed time: {}'.format(time.time() - start_time))

            curr_loss =loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
            item_num = 0
            start_time = time.time()
            if curr_loss < best_loss:
                if config.verbose:
                    print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss))
                best_loss = curr_loss
                model_all.save_weights(config.model_path)
            break
#         except Exception as e:
#             print('Exception: {}'.format(e))
#             break
                
print('Training complete!')