tensorflow · mihaimaruseac · Oct 28, 2021 · Oct 26, 2021
diff --git a/tensorflow/core/kernels/boosted_trees/stats_ops.cc b/tensorflow/core/kernels/boosted_trees/stats_ops.cc
@@ -72,7 +72,10 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel {
                                                 &stats_summary_list));
     const int64_t num_buckets = stats_summary_list[0].dim_size(1);
     // Check for single logit: 1 gradient + 1 hessian value.
-    DCHECK_EQ(stats_summary_list[0].dim_size(2), 2);
+    OP_REQUIRES(context, stats_summary_list[0].dim_size(2) == 2,
+                errors::InvalidArgument("stats_summary_list[0] must have "
+                                        "exactly 2 dimensions, obtained: ",
+                                        stats_summary_list[0].dim_size(2)));
     std::vector<TTypes<float, 3>::ConstTensor> stats_summary;
     stats_summary.reserve(stats_summary_list.size());
     for (const auto& tensor : stats_summary_list) {
@@ -275,8 +278,13 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel {
     const int32_t num_buckets = stats_summary_t->dim_size(2) - 1;
     const int32_t logits_dim = logits_dim_;
     const int32_t hessian_dim = stats_summary_t->dim_size(3) - logits_dim;
-    DCHECK_GT(hessian_dim, 0);
-    DCHECK_LE(hessian_dim, logits_dim * logits_dim);
+    OP_REQUIRES(context, hessian_dim > 0,
+                errors::InvalidArgument("hessian dim should be < 0, got ",
+                                        hessian_dim));
+    OP_REQUIRES(context, hessian_dim <= logits_dim * logits_dim,
+                errors::InvalidArgument(
+                    "hessian dim should be <= ", logits_dim * logits_dim,
+                    " but got: ", hessian_dim));
 
     const Tensor* l1_t;
     OP_REQUIRES_OK(context, context->input("l1", &l1_t));
@@ -624,8 +632,13 @@ class BoostedTreesCalculateBestFeatureSplitV2 : public OpKernel {
     const int32_t logits_dim = logits_dim_;
     const int32_t hessian_dim =
         stats_summaries_list[0].dim_size(3) - logits_dim;
-    DCHECK_GT(hessian_dim, 0);
-    DCHECK_LE(hessian_dim, logits_dim * logits_dim);
+    OP_REQUIRES(context, hessian_dim > 0,
+                errors::InvalidArgument("hessian dim should be < 0, got ",
+                                        hessian_dim));
+    OP_REQUIRES(context, hessian_dim <= logits_dim * logits_dim,
+                errors::InvalidArgument(
+                    "hessian dim should be <= ", logits_dim * logits_dim,
+                    " but got: ", hessian_dim));
 
     // Vector of stats_summaries; each element is stats for feature of shape
     // [max_splits, feature_dim, num_buckets, logits_dim + hessian_dim].
@@ -1002,6 +1015,10 @@ class BoostedTreesSparseCalculateBestFeatureSplitOp : public OpKernel {
     const Tensor* node_id_range_t;
     OP_REQUIRES_OK(context, context->input("node_id_range", &node_id_range_t));
     const auto node_id_range = node_id_range_t->vec<int32>();
+    OP_REQUIRES(
+        context, node_id_range.size() == 2,
+        errors::InvalidArgument("node_id_range should have 2 entries, got: ",
+                                node_id_range.size()));
     const int32_t node_id_first = node_id_range(0);  // inclusive
     const int32_t node_id_last = node_id_range(1);   // exclusive
 
@@ -1075,6 +1092,11 @@ class BoostedTreesSparseCalculateBestFeatureSplitOp : public OpKernel {
                       "dims, the last value in stats_summary_shape, which was ",
                       stats_dims, ". At index (", idx,
                       ", 4), stats_summary_indices contains value ", stat_dim));
+      OP_REQUIRES(context, stat_dim >= 0,
+                  errors::InvalidArgument(
+                      "Stat dim, the sum of logits dim and hessian dim in "
+                      "stats_summary_indices, should be >= 0, which was ",
+                      stat_dim, " at index ", idx));
       std::pair<FeatureMapIterator, bool> const& f_insert_result = f_map.insert(
           FeatureMapIterator::value_type(feature_dim, BucketMap()));
       auto& b_map = f_insert_result.first->second;
@@ -1307,6 +1329,12 @@ class BoostedTreesMakeStatsSummaryOp : public OpKernel {
     const Tensor* gradients_t;
     OP_REQUIRES_OK(context, context->input("gradients", &gradients_t));
     const auto gradients = gradients_t->matrix<float>();
+    OP_REQUIRES(
+        context, node_ids.size() == gradients.dimension(0),
+        errors::InvalidArgument(
+            "node_ids size should match 0th dim of gradients. node ids "
+            "size: ",
+            node_ids.size(), ", gradients dim0: ", gradients.dimension(0)));
     // hessians
     const Tensor* hessians_t;
     OP_REQUIRES_OK(context, context->input("hessians", &hessians_t));
@@ -1376,6 +1404,13 @@ class BoostedTreesAggregateStatsOp : public OpKernel {
     OP_REQUIRES_OK(context, context->input("gradients", &gradients_t));
     const auto gradients = gradients_t->matrix<float>();
 
+    OP_REQUIRES(
+        context, node_ids.size() == gradients.dimension(0),
+        errors::InvalidArgument(
+            "node_ids size should match 0th dim of gradients. node ids "
+            "size: ",
+            node_ids.size(), ", gradients dim0: ", gradients.dimension(0)));
+
     // hessians.
     const Tensor* hessians_t;
     OP_REQUIRES_OK(context, context->input("hessians", &hessians_t));
@@ -1406,6 +1441,9 @@ class BoostedTreesAggregateStatsOp : public OpKernel {
 
     for (int i = 0; i < batch_size; ++i) {
       const int32_t node = node_ids(i);
+      OP_REQUIRES(context, node >= 0,
+                  errors::InvalidArgument(
+                      "node_ids ", i, "th entry should be >=0, got: ", node));
       for (int feature_dim = 0; feature_dim < feature_dims; ++feature_dim) {
         const int32_t feature_value = feature(i, feature_dim);
         const int32_t bucket =
@@ -1612,7 +1650,12 @@ class BoostedTreesSparseAggregateStatsOp : public OpKernel {
     const int64_t stats_dims = logits_dims + hessians_dims;
     const int64_t num_sparse_entries = feature_indices_t->dim_size(0);
     const int32_t feature_dims = feature_shape(1);
-    DCHECK_LE(num_sparse_entries, batch_size * feature_dims);
+    OP_REQUIRES(context, num_sparse_entries <= batch_size * feature_dims,
+                errors::InvalidArgument(
+                    "feature_indices dim0 should be <= gradients dim0 * "
+                    "feature_shape[1]. features_indices dim0: ",
+                    num_sparse_entries, " gradients dim0: ", batch_size,
+                    ", feature_shape[1]: ", feature_dims));
 
     // Aggregate statistics info to map.
     StatsPartitionMap stats_map;

diff --git a/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py
@@ -21,9 +21,11 @@
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import boosted_trees_ops
+from tensorflow.python.ops import gen_boosted_trees_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.platform import googletest
 
@@ -1669,6 +1671,199 @@ def testMakeStatsSummaryNumericalPrecisionMegaBatch(self):
     """Tests numeric precision."""
     self._verify_precision(length=50000000)
 
+  def testBoostedTreesCalculateBestGainsPerFeatureSecurity(self):
+    node_id_range = [1, 2]
+    stats_summary_list = [[[[]]]]
+    l1 = [1.0]
+    l2 = [1.0]
+    tree_complexity = [1.0]
+    min_node_weight = [1.17]
+    max_splits = 1
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_calculate_best_gains_per_feature(
+          node_id_range=node_id_range,
+          stats_summary_list=stats_summary_list,
+          l1=l1,
+          l2=l2,
+          tree_complexity=tree_complexity,
+          min_node_weight=min_node_weight,
+          max_splits=max_splits)
+
+  def testBoostedTreesCalculateBestFeatureSplitSecurity(self):
+    node_id_range = [1, 2]
+    stats_summary = [[[[]]]]
+    split_type = 'equality'
+    l1 = [1.0]
+    l2 = [1.0]
+    tree_complexity = [1.0]
+    min_node_weight = [1.17]
+    logits_dimension = 5
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split(
+          node_id_range=node_id_range,
+          stats_summary=stats_summary,
+          l1=l1,
+          l2=l2,
+          tree_complexity=tree_complexity,
+          min_node_weight=min_node_weight,
+          logits_dimension=logits_dimension,
+          split_type=split_type)
+
+  def testBoostedTreesCalculateBestFeatureSplitSecurity2(self):
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split(
+          node_id_range=[0, 8],
+          stats_summary=[[[[1.0], [2.0], [3.0]]]],
+          l1=[0.5],
+          l2=[0.5],
+          tree_complexity=[0.1],
+          min_node_weight=[1.0],
+          logits_dimension=8)
+
+  def testBoostedTreesCalculateBestFeatureSplitV2Security(self):
+    node_id_range = [1, 2]
+    stats_summaries_list = [[[[[]]]]]
+    split_types = ['inequality']
+    candidate_feature_ids = [1, 2, 3, 4]
+    l1 = [1.0]
+    l2 = [1.0]
+    tree_complexity = [1.0]
+    min_node_weight = [1.17]
+    logits_dimension = 5
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split_v2(
+          node_id_range=node_id_range,
+          stats_summaries_list=stats_summaries_list,
+          split_types=split_types,
+          candidate_feature_ids=candidate_feature_ids,
+          l1=l1,
+          l2=l2,
+          tree_complexity=tree_complexity,
+          min_node_weight=min_node_weight,
+          logits_dimension=logits_dimension)
+
+  def testBoostedTreesSparseCalculateBestFeatureSplitSecurity(self):
+    node_id_range = []
+    stats_summary_indices = [[]]
+    stats_summary_values = [1.0]
+    stats_summary_shape = [1, 1, 1, 1]
+    l1 = [1.0]
+    l2 = [1.0]
+    tree_complexity = [0.5]
+    min_node_weight = [1.0]
+    logits_dimension = 3
+    split_type = 'inequality'
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_sparse_calculate_best_feature_split(
+          node_id_range=node_id_range,
+          stats_summary_indices=stats_summary_indices,
+          stats_summary_values=stats_summary_values,
+          stats_summary_shape=stats_summary_shape,
+          l1=l1,
+          l2=l2,
+          tree_complexity=tree_complexity,
+          min_node_weight=min_node_weight,
+          logits_dimension=logits_dimension,
+          split_type=split_type)
+
+  def testBoostedTreesSparseCalculateBestFeatureSplitSecurity2(self):
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_sparse_calculate_best_feature_split(
+          node_id_range=[0, 1],
+          stats_summary_indices=[[0, -1, -1, -1], [1, 0, -1, 0], [1, 0, 0, -1]],
+          stats_summary_values=[0.1, 0.2, 0.3],
+          stats_summary_shape=[1, 1, 1, 1],
+          l1=[0.5],
+          l2=[0.5],
+          tree_complexity=[0.1],
+          min_node_weight=[1.0],
+          logits_dimension=1)
+
+  def testBoostedTreesMakeStatsSummarySecurity(self):
+    node_ids = [1, 2]
+    gradients = [[]]
+    hessians = [[0.2], [0.1]]
+    bucketized_features_list = [[1], [2]]
+    max_splits = 3
+    num_buckets = 3
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_make_stats_summary(
+          node_ids=node_ids,
+          gradients=gradients,
+          hessians=hessians,
+          bucketized_features_list=bucketized_features_list,
+          max_splits=max_splits,
+          num_buckets=num_buckets)
+
+  def testBoostedTreesMakeStatsSummarySecurity2(self):
+    node_ids = [1, 2, 3]
+    gradients = [[0.1], [0.2]]
+    hessians = [[0.2], [0.1]]
+    bucketized_features_list = [[1], [2]]
+    max_splits = 3
+    num_buckets = 3
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_make_stats_summary(
+          node_ids=node_ids,
+          gradients=gradients,
+          hessians=hessians,
+          bucketized_features_list=bucketized_features_list,
+          max_splits=max_splits,
+          num_buckets=num_buckets)
+
+  def testBoostedTreesAggregateStatsSecurity(self):
+    node_ids = [1, 2]
+    gradients = [[]]
+    hessians = [[100.0]]
+    feature = [[0, 0, 0]]
+    max_splits = 100
+    num_buckets = 100
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_aggregate_stats(
+          node_ids=node_ids,
+          gradients=gradients,
+          hessians=hessians,
+          feature=feature,
+          max_splits=max_splits,
+          num_buckets=num_buckets)
+
+  def testBoostedTreesAggregateStatsSecurity2(self):
+    node_ids = [-10]
+    gradients = [[0.0, 0.0]]
+    hessians = [[100.0]]
+    feature = [[0, 0, 0]]
+    max_splits = 100
+    num_buckets = 100
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      self.evaluate(
+          gen_boosted_trees_ops.boosted_trees_aggregate_stats(
+              node_ids=node_ids,
+              gradients=gradients,
+              hessians=hessians,
+              feature=feature,
+              max_splits=max_splits,
+              num_buckets=num_buckets))
+
+  def testBoostedTreesSparseAggregateStatsSecurity(self):
+    node_ids = []
+    gradients = [[1.0]]
+    hessians = [[100.0]]
+    feature_indices = [[0, 0, 0]]
+    feature_values = [0, 0, 0]
+    feature_shape = [0, 0, 0]
+    max_splits = 100
+    num_buckets = 100
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_sparse_aggregate_stats(
+          node_ids=node_ids,
+          gradients=gradients,
+          hessians=hessians,
+          feature_indices=feature_indices,
+          feature_values=feature_values,
+          feature_shape=feature_shape,
+          max_splits=max_splits,
+          num_buckets=num_buckets)
+
 
 class BestMultiDimFeatureSplitMultiClassV2Op(StatsOpsTest):
   """Tests multi-class/multi-regression for best splits using V2 op."""