diff --git a/tensorflow/core/kernels/boosted_trees/stats_ops.cc b/tensorflow/core/kernels/boosted_trees/stats_ops.cc index 60c1d191f5232c..fe48695358b5d8 100644 --- a/tensorflow/core/kernels/boosted_trees/stats_ops.cc +++ b/tensorflow/core/kernels/boosted_trees/stats_ops.cc @@ -72,7 +72,10 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel { &stats_summary_list)); const int64_t num_buckets = stats_summary_list[0].dim_size(1); // Check for single logit: 1 gradient + 1 hessian value. - DCHECK_EQ(stats_summary_list[0].dim_size(2), 2); + OP_REQUIRES(context, stats_summary_list[0].dim_size(2) == 2, + errors::InvalidArgument("stats_summary_list[0] must have " + "exactly 2 dimensions, obtained: ", + stats_summary_list[0].dim_size(2))); std::vector::ConstTensor> stats_summary; stats_summary.reserve(stats_summary_list.size()); for (const auto& tensor : stats_summary_list) { @@ -275,8 +278,13 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel { const int32_t num_buckets = stats_summary_t->dim_size(2) - 1; const int32_t logits_dim = logits_dim_; const int32_t hessian_dim = stats_summary_t->dim_size(3) - logits_dim; - DCHECK_GT(hessian_dim, 0); - DCHECK_LE(hessian_dim, logits_dim * logits_dim); + OP_REQUIRES(context, hessian_dim > 0, + errors::InvalidArgument("hessian dim should be < 0, got ", + hessian_dim)); + OP_REQUIRES(context, hessian_dim <= logits_dim * logits_dim, + errors::InvalidArgument( + "hessian dim should be <= ", logits_dim * logits_dim, + " but got: ", hessian_dim)); const Tensor* l1_t; OP_REQUIRES_OK(context, context->input("l1", &l1_t)); @@ -624,8 +632,13 @@ class BoostedTreesCalculateBestFeatureSplitV2 : public OpKernel { const int32_t logits_dim = logits_dim_; const int32_t hessian_dim = stats_summaries_list[0].dim_size(3) - logits_dim; - DCHECK_GT(hessian_dim, 0); - DCHECK_LE(hessian_dim, logits_dim * logits_dim); + OP_REQUIRES(context, hessian_dim > 0, + errors::InvalidArgument("hessian dim should be < 0, got ", + hessian_dim)); + OP_REQUIRES(context, hessian_dim <= logits_dim * logits_dim, + errors::InvalidArgument( + "hessian dim should be <= ", logits_dim * logits_dim, + " but got: ", hessian_dim)); // Vector of stats_summaries; each element is stats for feature of shape // [max_splits, feature_dim, num_buckets, logits_dim + hessian_dim]. @@ -1002,6 +1015,10 @@ class BoostedTreesSparseCalculateBestFeatureSplitOp : public OpKernel { const Tensor* node_id_range_t; OP_REQUIRES_OK(context, context->input("node_id_range", &node_id_range_t)); const auto node_id_range = node_id_range_t->vec(); + OP_REQUIRES( + context, node_id_range.size() == 2, + errors::InvalidArgument("node_id_range should have 2 entries, got: ", + node_id_range.size())); const int32_t node_id_first = node_id_range(0); // inclusive const int32_t node_id_last = node_id_range(1); // exclusive @@ -1075,6 +1092,11 @@ class BoostedTreesSparseCalculateBestFeatureSplitOp : public OpKernel { "dims, the last value in stats_summary_shape, which was ", stats_dims, ". At index (", idx, ", 4), stats_summary_indices contains value ", stat_dim)); + OP_REQUIRES(context, stat_dim >= 0, + errors::InvalidArgument( + "Stat dim, the sum of logits dim and hessian dim in " + "stats_summary_indices, should be >= 0, which was ", + stat_dim, " at index ", idx)); std::pair const& f_insert_result = f_map.insert( FeatureMapIterator::value_type(feature_dim, BucketMap())); auto& b_map = f_insert_result.first->second; @@ -1307,6 +1329,12 @@ class BoostedTreesMakeStatsSummaryOp : public OpKernel { const Tensor* gradients_t; OP_REQUIRES_OK(context, context->input("gradients", &gradients_t)); const auto gradients = gradients_t->matrix(); + OP_REQUIRES( + context, node_ids.size() == gradients.dimension(0), + errors::InvalidArgument( + "node_ids size should match 0th dim of gradients. node ids " + "size: ", + node_ids.size(), ", gradients dim0: ", gradients.dimension(0))); // hessians const Tensor* hessians_t; OP_REQUIRES_OK(context, context->input("hessians", &hessians_t)); @@ -1376,6 +1404,13 @@ class BoostedTreesAggregateStatsOp : public OpKernel { OP_REQUIRES_OK(context, context->input("gradients", &gradients_t)); const auto gradients = gradients_t->matrix(); + OP_REQUIRES( + context, node_ids.size() == gradients.dimension(0), + errors::InvalidArgument( + "node_ids size should match 0th dim of gradients. node ids " + "size: ", + node_ids.size(), ", gradients dim0: ", gradients.dimension(0))); + // hessians. const Tensor* hessians_t; OP_REQUIRES_OK(context, context->input("hessians", &hessians_t)); @@ -1406,6 +1441,9 @@ class BoostedTreesAggregateStatsOp : public OpKernel { for (int i = 0; i < batch_size; ++i) { const int32_t node = node_ids(i); + OP_REQUIRES(context, node >= 0, + errors::InvalidArgument( + "node_ids ", i, "th entry should be >=0, got: ", node)); for (int feature_dim = 0; feature_dim < feature_dims; ++feature_dim) { const int32_t feature_value = feature(i, feature_dim); const int32_t bucket = @@ -1612,7 +1650,12 @@ class BoostedTreesSparseAggregateStatsOp : public OpKernel { const int64_t stats_dims = logits_dims + hessians_dims; const int64_t num_sparse_entries = feature_indices_t->dim_size(0); const int32_t feature_dims = feature_shape(1); - DCHECK_LE(num_sparse_entries, batch_size * feature_dims); + OP_REQUIRES(context, num_sparse_entries <= batch_size * feature_dims, + errors::InvalidArgument( + "feature_indices dim0 should be <= gradients dim0 * " + "feature_shape[1]. features_indices dim0: ", + num_sparse_entries, " gradients dim0: ", batch_size, + ", feature_shape[1]: ", feature_dims)); // Aggregate statistics info to map. StatsPartitionMap stats_map; diff --git a/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py index 73098ed3084da6..2af570da73f815 100644 --- a/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py +++ b/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py @@ -21,9 +21,11 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import boosted_trees_ops +from tensorflow.python.ops import gen_boosted_trees_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.platform import googletest @@ -1669,6 +1671,199 @@ def testMakeStatsSummaryNumericalPrecisionMegaBatch(self): """Tests numeric precision.""" self._verify_precision(length=50000000) + def testBoostedTreesCalculateBestGainsPerFeatureSecurity(self): + node_id_range = [1, 2] + stats_summary_list = [[[[]]]] + l1 = [1.0] + l2 = [1.0] + tree_complexity = [1.0] + min_node_weight = [1.17] + max_splits = 1 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_calculate_best_gains_per_feature( + node_id_range=node_id_range, + stats_summary_list=stats_summary_list, + l1=l1, + l2=l2, + tree_complexity=tree_complexity, + min_node_weight=min_node_weight, + max_splits=max_splits) + + def testBoostedTreesCalculateBestFeatureSplitSecurity(self): + node_id_range = [1, 2] + stats_summary = [[[[]]]] + split_type = 'equality' + l1 = [1.0] + l2 = [1.0] + tree_complexity = [1.0] + min_node_weight = [1.17] + logits_dimension = 5 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split( + node_id_range=node_id_range, + stats_summary=stats_summary, + l1=l1, + l2=l2, + tree_complexity=tree_complexity, + min_node_weight=min_node_weight, + logits_dimension=logits_dimension, + split_type=split_type) + + def testBoostedTreesCalculateBestFeatureSplitSecurity2(self): + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split( + node_id_range=[0, 8], + stats_summary=[[[[1.0], [2.0], [3.0]]]], + l1=[0.5], + l2=[0.5], + tree_complexity=[0.1], + min_node_weight=[1.0], + logits_dimension=8) + + def testBoostedTreesCalculateBestFeatureSplitV2Security(self): + node_id_range = [1, 2] + stats_summaries_list = [[[[[]]]]] + split_types = ['inequality'] + candidate_feature_ids = [1, 2, 3, 4] + l1 = [1.0] + l2 = [1.0] + tree_complexity = [1.0] + min_node_weight = [1.17] + logits_dimension = 5 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split_v2( + node_id_range=node_id_range, + stats_summaries_list=stats_summaries_list, + split_types=split_types, + candidate_feature_ids=candidate_feature_ids, + l1=l1, + l2=l2, + tree_complexity=tree_complexity, + min_node_weight=min_node_weight, + logits_dimension=logits_dimension) + + def testBoostedTreesSparseCalculateBestFeatureSplitSecurity(self): + node_id_range = [] + stats_summary_indices = [[]] + stats_summary_values = [1.0] + stats_summary_shape = [1, 1, 1, 1] + l1 = [1.0] + l2 = [1.0] + tree_complexity = [0.5] + min_node_weight = [1.0] + logits_dimension = 3 + split_type = 'inequality' + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_sparse_calculate_best_feature_split( + node_id_range=node_id_range, + stats_summary_indices=stats_summary_indices, + stats_summary_values=stats_summary_values, + stats_summary_shape=stats_summary_shape, + l1=l1, + l2=l2, + tree_complexity=tree_complexity, + min_node_weight=min_node_weight, + logits_dimension=logits_dimension, + split_type=split_type) + + def testBoostedTreesSparseCalculateBestFeatureSplitSecurity2(self): + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_sparse_calculate_best_feature_split( + node_id_range=[0, 1], + stats_summary_indices=[[0, -1, -1, -1], [1, 0, -1, 0], [1, 0, 0, -1]], + stats_summary_values=[0.1, 0.2, 0.3], + stats_summary_shape=[1, 1, 1, 1], + l1=[0.5], + l2=[0.5], + tree_complexity=[0.1], + min_node_weight=[1.0], + logits_dimension=1) + + def testBoostedTreesMakeStatsSummarySecurity(self): + node_ids = [1, 2] + gradients = [[]] + hessians = [[0.2], [0.1]] + bucketized_features_list = [[1], [2]] + max_splits = 3 + num_buckets = 3 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_make_stats_summary( + node_ids=node_ids, + gradients=gradients, + hessians=hessians, + bucketized_features_list=bucketized_features_list, + max_splits=max_splits, + num_buckets=num_buckets) + + def testBoostedTreesMakeStatsSummarySecurity2(self): + node_ids = [1, 2, 3] + gradients = [[0.1], [0.2]] + hessians = [[0.2], [0.1]] + bucketized_features_list = [[1], [2]] + max_splits = 3 + num_buckets = 3 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_make_stats_summary( + node_ids=node_ids, + gradients=gradients, + hessians=hessians, + bucketized_features_list=bucketized_features_list, + max_splits=max_splits, + num_buckets=num_buckets) + + def testBoostedTreesAggregateStatsSecurity(self): + node_ids = [1, 2] + gradients = [[]] + hessians = [[100.0]] + feature = [[0, 0, 0]] + max_splits = 100 + num_buckets = 100 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_aggregate_stats( + node_ids=node_ids, + gradients=gradients, + hessians=hessians, + feature=feature, + max_splits=max_splits, + num_buckets=num_buckets) + + def testBoostedTreesAggregateStatsSecurity2(self): + node_ids = [-10] + gradients = [[0.0, 0.0]] + hessians = [[100.0]] + feature = [[0, 0, 0]] + max_splits = 100 + num_buckets = 100 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + self.evaluate( + gen_boosted_trees_ops.boosted_trees_aggregate_stats( + node_ids=node_ids, + gradients=gradients, + hessians=hessians, + feature=feature, + max_splits=max_splits, + num_buckets=num_buckets)) + + def testBoostedTreesSparseAggregateStatsSecurity(self): + node_ids = [] + gradients = [[1.0]] + hessians = [[100.0]] + feature_indices = [[0, 0, 0]] + feature_values = [0, 0, 0] + feature_shape = [0, 0, 0] + max_splits = 100 + num_buckets = 100 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_sparse_aggregate_stats( + node_ids=node_ids, + gradients=gradients, + hessians=hessians, + feature_indices=feature_indices, + feature_values=feature_values, + feature_shape=feature_shape, + max_splits=max_splits, + num_buckets=num_buckets) + class BestMultiDimFeatureSplitMultiClassV2Op(StatsOpsTest): """Tests multi-class/multi-regression for best splits using V2 op."""