tensorflow · sguada · Mar 22, 2018 · Mar 8, 2018 · Mar 8, 2018 · Mar 9, 2018
diff --git a/research/object_detection/builders/dataset_builder.py b/research/object_detection/builders/dataset_builder.py
@@ -30,8 +30,8 @@
 from object_detection.utils import dataset_util
 
 
-def _get_padding_shapes(dataset, max_num_boxes, num_classes,
-                        spatial_image_shape):
+def _get_padding_shapes(dataset, max_num_boxes=None, num_classes=None,
+                        spatial_image_shape=None):
   """Returns shapes to pad dataset tensors to before batching.
 
   Args:
@@ -41,23 +41,28 @@ def _get_padding_shapes(dataset, max_num_boxes, num_classes,
     num_classes: Number of classes in the dataset needed to compute shapes for
       padding.
     spatial_image_shape: A list of two integers of the form [height, width]
-      containing expected spatial shape of the imaage.
+      containing expected spatial shape of the image.
 
   Returns:
     A dictionary keyed by fields.InputDataFields containing padding shapes for
     tensors in the dataset.
+
+  Raises:
+    ValueError: If groundtruth classes is neither rank 1 nor rank 2.
   """
-  height, width = spatial_image_shape
+
+  if not spatial_image_shape or spatial_image_shape == [-1, -1]:
+    height, width = None, None
+  else:
+    height, width = spatial_image_shape  # pylint: disable=unpacking-non-sequence
+
   padding_shapes = {
       fields.InputDataFields.image: [height, width, 3],
       fields.InputDataFields.source_id: [],
       fields.InputDataFields.filename: [],
       fields.InputDataFields.key: [],
       fields.InputDataFields.groundtruth_difficult: [max_num_boxes],
       fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4],
-      fields.InputDataFields.groundtruth_classes: [
-          max_num_boxes, num_classes
-      ],
       fields.InputDataFields.groundtruth_instance_masks: [max_num_boxes, height,
                                                           width],
       fields.InputDataFields.groundtruth_is_crowd: [max_num_boxes],
@@ -69,6 +74,21 @@ def _get_padding_shapes(dataset, max_num_boxes, num_classes,
       fields.InputDataFields.groundtruth_label_scores: [max_num_boxes],
       fields.InputDataFields.true_image_shape: [3]
   }
+  # Determine whether groundtruth_classes are integers or one-hot encodings, and
+  # apply batching appropriately.
+  classes_shape = dataset.output_shapes[
+      fields.InputDataFields.groundtruth_classes]
+  if len(classes_shape) == 1:  # Class integers.
+    padding_shapes[fields.InputDataFields.groundtruth_classes] = [max_num_boxes]
+  elif len(classes_shape) == 2:  # One-hot or k-hot encoding.
+    padding_shapes[fields.InputDataFields.groundtruth_classes] = [
+        max_num_boxes, num_classes]
+  else:
+    raise ValueError('Groundtruth classes must be a rank 1 tensor (classes) or '
+                     'rank 2 tensor (one-hot encodings)')
+
+  if fields.InputDataFields.original_image in dataset.output_shapes:
+    padding_shapes[fields.InputDataFields.original_image] = [None, None, 3]
   if fields.InputDataFields.groundtruth_keypoints in dataset.output_shapes:
     tensor_shape = dataset.output_shapes[fields.InputDataFields.
                                          groundtruth_keypoints]
@@ -87,37 +107,32 @@ def _get_padding_shapes(dataset, max_num_boxes, num_classes,
 
 
 def build(input_reader_config, transform_input_data_fn=None,
-          batch_size=1, max_num_boxes=None, num_classes=None,
+          batch_size=None, max_num_boxes=None, num_classes=None,
           spatial_image_shape=None):
   """Builds a tf.data.Dataset.
 
   Builds a tf.data.Dataset by applying the `transform_input_data_fn` on all
-  records. Optionally, if `batch_size` > 1 and `max_num_boxes`, `num_classes`
-  and `spatial_image_shape` are not None, returns a padded batched
-  tf.data.Dataset.
+  records. Applies a padded batch to the resulting dataset.
 
   Args:
     input_reader_config: A input_reader_pb2.InputReader object.
     transform_input_data_fn: Function to apply to all records, or None if
       no extra decoding is required.
-    batch_size: Batch size. If not None, returns a padded batch dataset.
-    max_num_boxes: Max number of groundtruth boxes needed to computes shapes for
-      padding. This is only used if batch_size is greater than 1.
+    batch_size: Batch size. If None, batching is not performed.
+    max_num_boxes: Max number of groundtruth boxes needed to compute shapes for
+      padding. If None, will use a dynamic shape.
     num_classes: Number of classes in the dataset needed to compute shapes for
-      padding. This is only used if batch_size is greater than 1.
-    spatial_image_shape: a list of two integers of the form [height, width]
+      padding. If None, will use a dynamic shape.
+    spatial_image_shape: A list of two integers of the form [height, width]
       containing expected spatial shape of the image after applying
-      transform_input_data_fn. This is needed to compute shapes for padding and
-      only used if batch_size is greater than 1.
+      transform_input_data_fn. If None, will use dynamic shapes.
 
   Returns:
     A tf.data.Dataset based on the input_reader_config.
 
   Raises:
     ValueError: On invalid input reader proto.
     ValueError: If no input paths are specified.
-    ValueError: If batch_size > 1 and any of (max_num_boxes, num_classes,
-      spatial_image_shape) is None.
   """
   if not isinstance(input_reader_config, input_reader_pb2.InputReader):
     raise ValueError('input_reader_config not of type '
@@ -147,14 +162,7 @@ def process_fn(value):
         functools.partial(tf.data.TFRecordDataset, buffer_size=8 * 1000 * 1000),
         process_fn, config.input_path[:], input_reader_config)
 
-    if batch_size > 1:
-      if num_classes is None:
-        raise ValueError('`num_classes` must be set when batch_size > 1.')
-      if max_num_boxes is None:
-        raise ValueError('`max_num_boxes` must be set when batch_size > 1.')
-      if spatial_image_shape is None:
-        raise ValueError('`spatial_image_shape` must be set when batch_size > '
-                         '1 .')
+    if batch_size:
       padding_shapes = _get_padding_shapes(dataset, max_num_boxes, num_classes,
                                            spatial_image_shape)
       dataset = dataset.apply(

diff --git a/research/object_detection/builders/dataset_builder_test.py b/research/object_detection/builders/dataset_builder_test.py
@@ -91,7 +91,7 @@ def test_build_tf_record_input_reader(self):
     input_reader_proto = input_reader_pb2.InputReader()
     text_format.Merge(input_reader_text_proto, input_reader_proto)
     tensor_dict = dataset_util.make_initializable_iterator(
-        dataset_builder.build(input_reader_proto)).get_next()
+        dataset_builder.build(input_reader_proto, batch_size=1)).get_next()
 
     sv = tf.train.Supervisor(logdir=self.get_temp_dir())
     with sv.prepare_or_wait_for_session() as sess:
@@ -100,15 +100,15 @@ def test_build_tf_record_input_reader(self):
 
     self.assertTrue(
         fields.InputDataFields.groundtruth_instance_masks not in output_dict)
-    self.assertEquals((4, 5, 3),
+    self.assertEquals((1, 4, 5, 3),
                       output_dict[fields.InputDataFields.image].shape)
-    self.assertEquals([2],
-                      output_dict[fields.InputDataFields.groundtruth_classes])
+    self.assertAllEqual([[2]],
+                        output_dict[fields.InputDataFields.groundtruth_classes])
     self.assertEquals(
-        (1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape)
+        (1, 1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape)
     self.assertAllEqual(
         [0.0, 0.0, 1.0, 1.0],
-        output_dict[fields.InputDataFields.groundtruth_boxes][0])
+        output_dict[fields.InputDataFields.groundtruth_boxes][0][0])
 
   def test_build_tf_record_input_reader_and_load_instance_masks(self):
     tf_record_path = self.create_tf_record()
@@ -124,14 +124,14 @@ def test_build_tf_record_input_reader_and_load_instance_masks(self):
     input_reader_proto = input_reader_pb2.InputReader()
     text_format.Merge(input_reader_text_proto, input_reader_proto)
     tensor_dict = dataset_util.make_initializable_iterator(
-        dataset_builder.build(input_reader_proto)).get_next()
+        dataset_builder.build(input_reader_proto, batch_size=1)).get_next()
 
     sv = tf.train.Supervisor(logdir=self.get_temp_dir())
     with sv.prepare_or_wait_for_session() as sess:
       sv.start_queue_runners(sess)
       output_dict = sess.run(tensor_dict)
     self.assertAllEqual(
-        (1, 4, 5),
+        (1, 1, 4, 5),
         output_dict[fields.InputDataFields.groundtruth_instance_masks].shape)
 
   def test_build_tf_record_input_reader_with_batch_size_two(self):

diff --git a/research/object_detection/builders/model_builder.py b/research/object_detection/builders/model_builder.py
@@ -36,13 +36,15 @@
 from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor
 from object_detection.models.ssd_inception_v3_feature_extractor import SSDInceptionV3FeatureExtractor
 from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor
+from object_detection.models.ssd_mobilenet_v2_feature_extractor import SSDMobileNetV2FeatureExtractor
 from object_detection.protos import model_pb2
 
 # A map of names to SSD feature extractors.
 SSD_FEATURE_EXTRACTOR_CLASS_MAP = {
     'ssd_inception_v2': SSDInceptionV2FeatureExtractor,
     'ssd_inception_v3': SSDInceptionV3FeatureExtractor,
     'ssd_mobilenet_v1': SSDMobileNetV1FeatureExtractor,
+    'ssd_mobilenet_v2': SSDMobileNetV2FeatureExtractor,
     'ssd_resnet50_v1_fpn': ssd_resnet_v1_fpn.SSDResnet50V1FpnFeatureExtractor,
     'ssd_resnet101_v1_fpn': ssd_resnet_v1_fpn.SSDResnet101V1FpnFeatureExtractor,
     'ssd_resnet152_v1_fpn': ssd_resnet_v1_fpn.SSDResnet152V1FpnFeatureExtractor,

diff --git a/research/object_detection/builders/model_builder_test.py b/research/object_detection/builders/model_builder_test.py
@@ -31,6 +31,7 @@
 from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor
 from object_detection.models.ssd_inception_v3_feature_extractor import SSDInceptionV3FeatureExtractor
 from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor
+from object_detection.models.ssd_mobilenet_v2_feature_extractor import SSDMobileNetV2FeatureExtractor
 from object_detection.protos import model_pb2
 
 FRCNN_RESNET_FEAT_MAPS = {
@@ -368,6 +369,81 @@ def test_create_ssd_mobilenet_v1_model_from_config(self):
     self.assertTrue(model._feature_extractor._batch_norm_trainable)
     self.assertTrue(model._normalize_loc_loss_by_codesize)
 
+  def test_create_ssd_mobilenet_v2_model_from_config(self):
+    model_text_proto = """
+      ssd {
+        feature_extractor {
+          type: 'ssd_mobilenet_v2'
+          conv_hyperparams {
+            regularizer {
+                l2_regularizer {
+                }
+              }
+              initializer {
+                truncated_normal_initializer {
+                }
+              }
+          }
+          batch_norm_trainable: true
+        }
+        box_coder {
+          faster_rcnn_box_coder {
+          }
+        }
+        matcher {
+          argmax_matcher {
+          }
+        }
+        similarity_calculator {
+          iou_similarity {
+          }
+        }
+        anchor_generator {
+          ssd_anchor_generator {
+            aspect_ratios: 1.0
+          }
+        }
+        image_resizer {
+          fixed_shape_resizer {
+            height: 320
+            width: 320
+          }
+        }
+        box_predictor {
+          convolutional_box_predictor {
+            conv_hyperparams {
+              regularizer {
+                l2_regularizer {
+                }
+              }
+              initializer {
+                truncated_normal_initializer {
+                }
+              }
+            }
+          }
+        }
+        normalize_loc_loss_by_codesize: true
+        loss {
+          classification_loss {
+            weighted_softmax {
+            }
+          }
+          localization_loss {
+            weighted_smooth_l1 {
+            }
+          }
+        }
+      }"""
+    model_proto = model_pb2.DetectionModel()
+    text_format.Merge(model_text_proto, model_proto)
+    model = self.create_model(model_proto)
+    self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
+    self.assertIsInstance(model._feature_extractor,
+                          SSDMobileNetV2FeatureExtractor)
+    self.assertTrue(model._feature_extractor._batch_norm_trainable)
+    self.assertTrue(model._normalize_loc_loss_by_codesize)
+
   def test_create_embedded_ssd_mobilenet_v1_model_from_config(self):
     model_text_proto = """
       ssd {

diff --git a/research/object_detection/builders/optimizer_builder.py b/research/object_detection/builders/optimizer_builder.py
@@ -85,7 +85,8 @@ def _create_learning_rate(learning_rate_config):
   learning_rate_type = learning_rate_config.WhichOneof('learning_rate')
   if learning_rate_type == 'constant_learning_rate':
     config = learning_rate_config.constant_learning_rate
-    learning_rate = tf.constant(config.learning_rate, dtype=tf.float32)
+    learning_rate = tf.constant(config.learning_rate, dtype=tf.float32,
+                                name='learning_rate')
 
   if learning_rate_type == 'exponential_decay_learning_rate':
     config = learning_rate_config.exponential_decay_learning_rate
@@ -94,7 +95,7 @@ def _create_learning_rate(learning_rate_config):
         tf.train.get_or_create_global_step(),
         config.decay_steps,
         config.decay_factor,
-        staircase=config.staircase)
+        staircase=config.staircase, name='learning_rate')
 
   if learning_rate_type == 'manual_step_learning_rate':
     config = learning_rate_config.manual_step_learning_rate
@@ -105,7 +106,7 @@ def _create_learning_rate(learning_rate_config):
     learning_rate_sequence += [x.learning_rate for x in config.schedule]
     learning_rate = learning_schedules.manual_stepping(
         tf.train.get_or_create_global_step(), learning_rate_step_boundaries,
-        learning_rate_sequence)
+        learning_rate_sequence, config.warmup)
 
   if learning_rate_type == 'cosine_decay_learning_rate':
     config = learning_rate_config.cosine_decay_learning_rate
@@ -114,7 +115,8 @@ def _create_learning_rate(learning_rate_config):
         config.learning_rate_base,
         config.total_steps,
         config.warmup_learning_rate,
-        config.warmup_steps)
+        config.warmup_steps,
+        config.hold_base_rate_steps)
 
   if learning_rate is None:
     raise ValueError('Learning_rate %s not supported.' % learning_rate_type)

diff --git a/research/object_detection/builders/optimizer_builder_test.py b/research/object_detection/builders/optimizer_builder_test.py
@@ -35,6 +35,7 @@ def testBuildConstantLearningRate(self):
     text_format.Merge(learning_rate_text_proto, learning_rate_proto)
     learning_rate = optimizer_builder._create_learning_rate(
         learning_rate_proto)
+    self.assertTrue(learning_rate.op.name.endswith('learning_rate'))
     with self.test_session():
       learning_rate_out = learning_rate.eval()
     self.assertAlmostEqual(learning_rate_out, 0.004)
@@ -52,19 +53,22 @@ def testBuildExponentialDecayLearningRate(self):
     text_format.Merge(learning_rate_text_proto, learning_rate_proto)
     learning_rate = optimizer_builder._create_learning_rate(
         learning_rate_proto)
+    self.assertTrue(learning_rate.op.name.endswith('learning_rate'))
     self.assertTrue(isinstance(learning_rate, tf.Tensor))
 
   def testBuildManualStepLearningRate(self):
     learning_rate_text_proto = """
       manual_step_learning_rate {
+        initial_learning_rate: 0.002
         schedule {
-          step: 0
+          step: 100
           learning_rate: 0.006
         }
         schedule {
           step: 90000
           learning_rate: 0.00006
         }
+        warmup: true
       }
     """
     learning_rate_proto = optimizer_pb2.LearningRate()
@@ -80,6 +84,7 @@ def testBuildCosineDecayLearningRate(self):
         total_steps: 20000
         warmup_learning_rate: 0.0001
         warmup_steps: 1000
+        hold_base_rate_steps: 20000
       }
     """
     learning_rate_proto = optimizer_pb2.LearningRate()

diff --git a/research/object_detection/core/box_list_ops_test.py b/research/object_detection/core/box_list_ops_test.py
@@ -727,21 +727,6 @@ def test_concatenate_is_correct(self):
 
 class NonMaxSuppressionTest(tf.test.TestCase):
 
-  def test_with_invalid_scores_field(self):
-    corners = tf.constant([[0, 0, 1, 1],
-                           [0, 0.1, 1, 1.1],
-                           [0, -0.1, 1, 0.9],
-                           [0, 10, 1, 11],
-                           [0, 10.1, 1, 11.1],
-                           [0, 100, 1, 101]], tf.float32)
-    boxes = box_list.BoxList(corners)
-    boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5]))
-    iou_thresh = .5
-    max_output_size = 3
-    with self.assertRaisesWithPredicateMatch(ValueError,
-                                             'Dimensions must be equal'):
-      box_list_ops.non_max_suppression(boxes, iou_thresh, max_output_size)
-
   def test_select_from_three_clusters(self):
     corners = tf.constant([[0, 0, 1, 1],
                            [0, 0.1, 1, 1.1],