Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add MaskRCNN model as a test for the wrapper approach with TF model garden code #754

Closed
wants to merge 15 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions keras_cv/models/__init__.py
Expand Up @@ -29,6 +29,7 @@
from keras_cv.models.mobilenet_v3 import MobileNetV3Large
from keras_cv.models.mobilenet_v3 import MobileNetV3Small
from keras_cv.models.object_detection.retina_net.retina_net import RetinaNet
from keras_cv.models.object_detection.mask_rcnn.mask_rcnn import mask_rcnn
from keras_cv.models.resnet_v1 import ResNet50
from keras_cv.models.resnet_v1 import ResNet101
from keras_cv.models.resnet_v1 import ResNet152
Expand Down
Empty file.
307 changes: 307 additions & 0 deletions keras_cv/models/object_detection/mask_rcnn/mask_rcnn.py
@@ -0,0 +1,307 @@
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Mask R-CNN model based on the Tensorflow model garden implementation.

As an experiment, Keras CV will try to wrap a thin layer on top of model garden
implementation, so that we could quickly provide models to end user without
re-implement/re-train the model end to end.

This class might serve as a temporary solution, and might change its interface
and details if needed in the future.
"""

import tensorflow as tf

try:
import tensorflow_models as tfm
except ImportError:
# Didn't have tensorflow model garden package installed, and the model can't
# be instantiated.
tfm = None


def _assert_tensorflow_model_installed():
if tfm is None:
raise ImportError(
'Tensor model package is not installed in the '
'environment. Please use `pip install tf-models-official` '
'or `pip install tf-models-nightly` to install the required '
'dependency package')


def mask_rcnn(
classes,
input_size,
backbone,
backbone_weights,
) -> tf.keras.Model:
"""Create a MaskRCNN model.

Args:
classes: int, the number of classes for the detection model. Note that
the classes doesn't contain the background class, and the classes
from the data should be represented by integers with range
[0, classes).
input_size: list or tuple of ints, the shape of the input for the model.
Note that this doesn't include the batch dimension. A sample use case
is like [512, 512, 3].
backbone: a backbone network for the model. Can be a `tf.keras.Model`
instance. The supported pre-defined backbone models are
1: 'resnet', which is a Resnet50 model.
2: 'spinenet', which is SpineNet49 model.
3: 'mobilenet', which is a MobileNet v2 model.
backbone_weights: pre-trained weights for the backbone model. The weights
can be a list of tensors, which will be set to backbone model
via `model.set_weights()`. A string file path is also supported, to
read the weights from a folder of tensorflow checkpoints. Note that
the weights and backbone network should be compatible between each
other.
"""
_assert_tensorflow_model_installed()

# Common model configs that are shared by all parts of the network
# TODO(scottzhu): Those value should be tunable, based on different
# backbone/decoder/RPN/
# This setting is for ResNet 50
min_level = 2
max_level = 6
anchor_aspect_ratios = [0.5, 1.0, 2.0]
anchor_num_scales = 1
anchor_size = 8
num_anchors_per_location = len(anchor_aspect_ratios) * anchor_num_scales

# Common normalization/regularization configs that are shared by all parts of the
# network.
activation = 'relu'
use_sync_bn = True
norm_momentum = 0.997
norm_epsilon = 0.0001
kernel_regularizer = tf.keras.regularizers.l2(0.00002)

# ================== Backbone =====================
if isinstance(backbone, str):
supported_premade_backbone = ['resnet', 'spinenet', 'mobilenet']
if backbone not in supported_premade_backbone:
raise ValueError(
f'Supported premade backbones are: '
'{supported_premade_backbone}, received "{backbone}"')
input_spec = tf.keras.layers.InputSpec(shape=[None] + list(input_size))
if backbone == 'resnet':
backbone = tfm.vision.backbones.ResNet(
model_id=50,
input_specs=input_spec,
se_ratio=0.0, # From the default Resnet config
activation=activation,
use_sync_bn=use_sync_bn,
norm_momentum=norm_momentum,
norm_epsilon=norm_epsilon,
kernel_regularizer=kernel_regularizer
)
else:
# TODO(scottzhu): Add spinenet and mobilenet later
pass
else:
# Make sure the backbone is a Keras model instance
# TODO(scottzhu): Might need to do more assertion about the model
if not isinstance(backbone, tf.keras.Model):
raise ValueError('Backbone need to be a `tf.keras.Model`, '
f'received {backbone}')

# load the backbone weights if provided.
# TODO(scottzhu):

# ================== Decoder/FPN =====================
# TODO(scottzhu): Might want to expose the decoder to the method signature.
decoder_num_filters = 256
decoder = tfm.vision.decoders.FPN(
input_specs=backbone.output_specs, # Might need to check for custom backbones
min_level=min_level,
max_level=max_level,
num_filters=decoder_num_filters,
activation=activation,
use_sync_bn=use_sync_bn,
norm_momentum=norm_momentum,
norm_epsilon=norm_epsilon,
kernel_regularizer=kernel_regularizer
)

# ================== RPN =====================
# TODO(scottzhu): Might want to expose the RPN to the method signature.
rpn_num_convs = 1
rpn_num_filters = 256
rpn_head = tfm.vision.heads.RPNHead(
min_level=min_level,
max_level=max_level,
num_anchors_per_location=num_anchors_per_location,
num_convs=rpn_num_convs,
num_filters=rpn_num_filters,
activation=activation,
use_sync_bn=use_sync_bn,
norm_momentum=norm_momentum,
norm_epsilon=norm_epsilon,
kernel_regularizer=kernel_regularizer
)

# ================== Detection head =====================
# TODO(scottzhu): Might want to expose the detection head to the method signature.
detection_num_convs = 4
detection_num_filters = 256
detection_num_fcs = 1
detection_fc_dims = 1024
detection_class_agnostic_bbox_pred = False
detection_head = tfm.vision.heads.DetectionHead(
num_classes=classes,
num_convs=detection_num_convs,
num_filters=detection_num_filters,
num_fcs=detection_num_fcs,
fc_dims=detection_fc_dims,
class_agnostic_bbox_pred=detection_class_agnostic_bbox_pred,
activation=activation,
use_sync_bn=use_sync_bn,
norm_momentum=norm_momentum,
norm_epsilon=norm_epsilon,
kernel_regularizer=kernel_regularizer,
name='detection_head')

# Create feature output tensor from backbone and RPN
backbone_features = backbone(tf.keras.Input(shape=input_size))
decoder_features = decoder(backbone_features)
rpn_head(decoder_features)

# ================== ROI generator =====================
roi_generator_pre_nms_top_k = 2000
roi_generator_pre_nms_score_threshold = 0.0
roi_generator_pre_nms_min_size_threshold = 0.0
roi_generator_nms_iou_threshold = 0.7
roi_generator_num_proposals = 1000
roi_generator_test_pre_nms_top_k = 1000
roi_generator_test_pre_nms_score_threshold = 0.0
roi_generator_test_pre_nms_min_size_threshold = 0.0
roi_generator_test_nms_iou_threshold = 0.7
roi_generator_test_num_proposals = 1000
roi_generator_use_batched_nms = False
roi_generator = tfm.vision.layers.roi_generator.MultilevelROIGenerator(
pre_nms_top_k=roi_generator_pre_nms_top_k,
pre_nms_score_threshold=roi_generator_pre_nms_score_threshold,
pre_nms_min_size_threshold=roi_generator_pre_nms_min_size_threshold,
nms_iou_threshold=roi_generator_nms_iou_threshold,
num_proposals=roi_generator_num_proposals,
test_pre_nms_top_k=roi_generator_test_pre_nms_top_k,
test_pre_nms_score_threshold=roi_generator_test_pre_nms_score_threshold,
test_pre_nms_min_size_threshold=roi_generator_test_pre_nms_min_size_threshold,
test_nms_iou_threshold=roi_generator_test_nms_iou_threshold,
test_num_proposals=roi_generator_test_num_proposals,
use_batched_nms=roi_generator_use_batched_nms
)

# ================== ROI =====================
roi_sampler_mix_gt_boxes = True
roi_sampler_num_sampled_rois = 512
roi_sampler_foreground_fraction = 0.25
roi_sampler_foreground_iou_threshold = 0.5
roi_sampler_background_iou_high_threshold = 0.5
roi_sampler_background_iou_low_threshold = 0.0
roi_sampler = tfm.vision.layers.ROISampler(
mix_gt_boxes=roi_sampler_mix_gt_boxes,
num_sampled_rois=roi_sampler_num_sampled_rois,
foreground_fraction=roi_sampler_foreground_fraction,
foreground_iou_threshold=roi_sampler_foreground_iou_threshold,
background_iou_high_threshold=roi_sampler_background_iou_high_threshold,
background_iou_low_threshold=roi_sampler_background_iou_low_threshold
)

roi_aligner_crop_size = 7
roi_aligner_sample_offset = 0.5
roi_aligner = tfm.vision.layers.MultilevelROIAligner(
crop_size=roi_aligner_crop_size,
sample_offset=roi_aligner_sample_offset,
)

# ================= Detection Generator ======================
detection_generator_apply_nms = True
detection_generator_pre_nms_top_k = 5000
detection_generator_pre_nms_score_threshold = 0.05
detection_generator_nms_iou_threshold = 0.5
detection_generator_max_num_detections = 100
detection_generator_nms_version = 'v2'
detection_generator_use_cpu_nms = False
detection_generator_soft_nms_sigma = None
detection_generator = tfm.vision.layers.DetectionGenerator(
apply_nms=detection_generator_apply_nms,
pre_nms_top_k=detection_generator_pre_nms_top_k,
pre_nms_score_threshold=detection_generator_pre_nms_score_threshold,
nms_iou_threshold=detection_generator_nms_iou_threshold,
max_num_detections=detection_generator_max_num_detections,
nms_version=detection_generator_nms_version,
use_cpu_nms=detection_generator_use_cpu_nms,
soft_nms_sigma=detection_generator_soft_nms_sigma,
)

# =============== Mask head and sampler =======================
mask_head_upsample_factor = 2
mask_head_num_convs = 4
mask_head_num_filters = 256
mask_head_use_separable_conv = False
mask_head_class_agnostic = False
mask_sampler_num_mask = 128
mask_roi_aligner_crop_size = 14
mask_roi_aligner_sample_offset = 0.5

mask_head = tfm.vision.heads.MaskHead(
num_classes=classes,
upsample_factor=mask_head_upsample_factor,
num_convs=mask_head_num_convs,
num_filters=mask_head_num_filters,
use_separable_conv=mask_head_use_separable_conv,
activation=activation,
use_sync_bn=use_sync_bn,
norm_momentum=norm_momentum,
norm_epsilon=norm_epsilon,
kernel_regularizer=kernel_regularizer,
class_agnostic=mask_head_class_agnostic,
)
mask_sampler = tfm.vision.layers.MaskSampler(
mask_target_size=mask_roi_aligner_crop_size * mask_head_upsample_factor,
num_sampled_masks=mask_sampler_num_mask,
)
mask_roi_aligner = tfm.vision.layers.MultilevelROIAligner(
crop_size=mask_roi_aligner_crop_size,
sample_offset=mask_roi_aligner_sample_offset,
)

mask_rcnn_model = tfm.vision.maskrcnn_model.MaskRCNNModel(
backbone=backbone,
decoder=decoder,
rpn_head=rpn_head,
detection_head=detection_head,
roi_generator=roi_generator,
roi_sampler=roi_sampler,
roi_aligner=roi_aligner,
detection_generator=detection_generator,
mask_head=mask_head,
mask_sampler=mask_sampler,
mask_roi_aligner=mask_roi_aligner,
class_agnostic_bbox_pred=detection_class_agnostic_bbox_pred,
cascade_class_ensemble=False,
min_level=min_level,
max_level=max_level,
num_scales=anchor_num_scales,
aspect_ratios=anchor_aspect_ratios,
anchor_size=anchor_size,
)

# TODO(scottzhu): rewrite the model.train/eval functions.
# See http://google3/third_party/tensorflow_models/official/vision/tasks/maskrcnn.py;l=47;rcl=468079502
return mask_rcnn_model
69 changes: 69 additions & 0 deletions keras_cv/models/object_detection/mask_rcnn/mask_rcnn_test.py
@@ -0,0 +1,69 @@
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
import tensorflow as tf
import tensorflow_models as tfm

import keras_cv


class MaskRCNNTest(tf.test.TestCase):

@pytest.fixture(autouse=True)
def cleanup_global_session(self):
# Code before yield runs before the test
yield
tf.keras.backend.clear_session()

def test_maskrcnn_construction(self):
mask_rcnn_model = keras_cv.models.mask_rcnn(
classes=91, # default value for coco
input_size=[512, 512, 3],

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the input_size parameter necessary ?
Usually in Keras models, this gets built from the data.

backbone="resnet",
backbone_weights=None,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume "resnet" here refers to Model Gaden's ResNet ?
I tried passing "spinenet_mobile" but got an error.

Copy link
Contributor

@bhack bhack Sep 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently supported_premade_backbone = ['resnet', 'spinenet', 'mobilenet']

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried "spinenet" but it errors out:

decoder = tfm.vision.decoders.FPN(
--> 129 input_specs=backbone.output_specs, # Might need to check for custom backbones
130 min_level=min_level,
131 max_level=max_level,

AttributeError: 'str' object has no attribute 'output_specs'

Copy link
Contributor

@bhack bhack Sep 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

)
images = tf.random.uniform(shape=(2, 512, 512, 3))
image_shape = tf.constant([[512, 512], [512, 512]])
anchor_generator = tfm.vision.anchor.build_anchor_generator(

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To align with keras_cv, can a default anchor generator be provided when the user instantiates the model?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, for dome reason, tfm.vision.anchor does not work for me. Maybe the symbol is not exported ?

min_level=2,
max_level=6,
num_scales=1,
aspect_ratios=[0.5, 1.0, 2.0],
anchor_size=8)
anchor_boxes = anchor_generator(image_size=(512, 512))
gt_boxes = tf.constant([[[0, 0, 100, 100], [100, 100, 200, 200]],
[[200, 200, 300, 300], [300, 300, 400, 400]]])
gt_classes = tf.constant([[1, 2], [3, 4]])
gt_masks = tf.constant(1, shape=[2, 2, 512, 512])
output = mask_rcnn_model(

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a single call. Does .fit() work?

images=images,
image_shape=image_shape,
anchor_boxes=anchor_boxes,
gt_boxes=gt_boxes,
gt_classes=gt_classes,
gt_masks=gt_masks,
training=True)
self.assertLen(output, 11)
self.assertEquals(output['backbone_features'].keys(), {'2', '3', '4', '5'})
self.assertEquals(output['decoder_features'].keys(), {'2', '3', '4', '5', '6'})
self.assertEquals(output['rpn_boxes'].keys(), {'2', '3', '4', '5', '6'})
self.assertEquals(output['rpn_scores'].keys(), {'2', '3', '4', '5', '6'})
self.assertEquals(output['class_targets'].shape, [2, 512])
self.assertEquals(output['box_targets'].shape, [2, 512, 4])
self.assertEquals(output['class_outputs'].shape, [2, 512, 91])
self.assertEquals(output['box_outputs'].shape, [2, 512, 364])
self.assertEquals(output['mask_class_targets'].shape, [2, 128])
self.assertEquals(output['mask_targets'].shape, [2, 128, 28, 28])
self.assertEquals(output['mask_outputs'].shape, [2, 128, 28, 28])
2 changes: 2 additions & 0 deletions setup.cfg
Expand Up @@ -11,6 +11,8 @@ filterwarnings =
ignore::RuntimeWarning
ignore::PendingDeprecationWarning
ignore::FutureWarning
# This is from Addons when test with tf-nightly
ignore:You are currently using a nightly version of TensorFlow:UserWarning
[flake8]
max-line-length = 88
max-doc-length = 200
Expand Down