Skip to content
Permalink
Browse files Browse the repository at this point in the history
Fixing security fixes in boosted trees ops
PiperOrigin-RevId: 405669548
Change-Id: Iae224d240d1779bcc02405c2fff99785644fbd0d
  • Loading branch information
rohan100jain authored and tensorflower-gardener committed Oct 26, 2021
1 parent e43dc10 commit 5c8c9a8
Show file tree
Hide file tree
Showing 2 changed files with 244 additions and 6 deletions.
55 changes: 49 additions & 6 deletions tensorflow/core/kernels/boosted_trees/stats_ops.cc
Expand Up @@ -72,7 +72,10 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel {
&stats_summary_list));
const int64_t num_buckets = stats_summary_list[0].dim_size(1);
// Check for single logit: 1 gradient + 1 hessian value.
DCHECK_EQ(stats_summary_list[0].dim_size(2), 2);
OP_REQUIRES(context, stats_summary_list[0].dim_size(2) == 2,
errors::InvalidArgument("stats_summary_list[0] must have "
"exactly 2 dimensions, obtained: ",
stats_summary_list[0].dim_size(2)));
std::vector<TTypes<float, 3>::ConstTensor> stats_summary;
stats_summary.reserve(stats_summary_list.size());
for (const auto& tensor : stats_summary_list) {
Expand Down Expand Up @@ -275,8 +278,13 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel {
const int32_t num_buckets = stats_summary_t->dim_size(2) - 1;
const int32_t logits_dim = logits_dim_;
const int32_t hessian_dim = stats_summary_t->dim_size(3) - logits_dim;
DCHECK_GT(hessian_dim, 0);
DCHECK_LE(hessian_dim, logits_dim * logits_dim);
OP_REQUIRES(context, hessian_dim > 0,
errors::InvalidArgument("hessian dim should be < 0, got ",
hessian_dim));
OP_REQUIRES(context, hessian_dim <= logits_dim * logits_dim,
errors::InvalidArgument(
"hessian dim should be <= ", logits_dim * logits_dim,
" but got: ", hessian_dim));

const Tensor* l1_t;
OP_REQUIRES_OK(context, context->input("l1", &l1_t));
Expand Down Expand Up @@ -624,8 +632,13 @@ class BoostedTreesCalculateBestFeatureSplitV2 : public OpKernel {
const int32_t logits_dim = logits_dim_;
const int32_t hessian_dim =
stats_summaries_list[0].dim_size(3) - logits_dim;
DCHECK_GT(hessian_dim, 0);
DCHECK_LE(hessian_dim, logits_dim * logits_dim);
OP_REQUIRES(context, hessian_dim > 0,
errors::InvalidArgument("hessian dim should be < 0, got ",
hessian_dim));
OP_REQUIRES(context, hessian_dim <= logits_dim * logits_dim,
errors::InvalidArgument(
"hessian dim should be <= ", logits_dim * logits_dim,
" but got: ", hessian_dim));

// Vector of stats_summaries; each element is stats for feature of shape
// [max_splits, feature_dim, num_buckets, logits_dim + hessian_dim].
Expand Down Expand Up @@ -1002,6 +1015,10 @@ class BoostedTreesSparseCalculateBestFeatureSplitOp : public OpKernel {
const Tensor* node_id_range_t;
OP_REQUIRES_OK(context, context->input("node_id_range", &node_id_range_t));
const auto node_id_range = node_id_range_t->vec<int32>();
OP_REQUIRES(
context, node_id_range.size() == 2,
errors::InvalidArgument("node_id_range should have 2 entries, got: ",
node_id_range.size()));
const int32_t node_id_first = node_id_range(0); // inclusive
const int32_t node_id_last = node_id_range(1); // exclusive

Expand Down Expand Up @@ -1075,6 +1092,11 @@ class BoostedTreesSparseCalculateBestFeatureSplitOp : public OpKernel {
"dims, the last value in stats_summary_shape, which was ",
stats_dims, ". At index (", idx,
", 4), stats_summary_indices contains value ", stat_dim));
OP_REQUIRES(context, stat_dim >= 0,
errors::InvalidArgument(
"Stat dim, the sum of logits dim and hessian dim in "
"stats_summary_indices, should be >= 0, which was ",
stat_dim, " at index ", idx));
std::pair<FeatureMapIterator, bool> const& f_insert_result = f_map.insert(
FeatureMapIterator::value_type(feature_dim, BucketMap()));
auto& b_map = f_insert_result.first->second;
Expand Down Expand Up @@ -1307,6 +1329,12 @@ class BoostedTreesMakeStatsSummaryOp : public OpKernel {
const Tensor* gradients_t;
OP_REQUIRES_OK(context, context->input("gradients", &gradients_t));
const auto gradients = gradients_t->matrix<float>();
OP_REQUIRES(
context, node_ids.size() == gradients.dimension(0),
errors::InvalidArgument(
"node_ids size should match 0th dim of gradients. node ids "
"size: ",
node_ids.size(), ", gradients dim0: ", gradients.dimension(0)));
// hessians
const Tensor* hessians_t;
OP_REQUIRES_OK(context, context->input("hessians", &hessians_t));
Expand Down Expand Up @@ -1376,6 +1404,13 @@ class BoostedTreesAggregateStatsOp : public OpKernel {
OP_REQUIRES_OK(context, context->input("gradients", &gradients_t));
const auto gradients = gradients_t->matrix<float>();

OP_REQUIRES(
context, node_ids.size() == gradients.dimension(0),
errors::InvalidArgument(
"node_ids size should match 0th dim of gradients. node ids "
"size: ",
node_ids.size(), ", gradients dim0: ", gradients.dimension(0)));

// hessians.
const Tensor* hessians_t;
OP_REQUIRES_OK(context, context->input("hessians", &hessians_t));
Expand Down Expand Up @@ -1406,6 +1441,9 @@ class BoostedTreesAggregateStatsOp : public OpKernel {

for (int i = 0; i < batch_size; ++i) {
const int32_t node = node_ids(i);
OP_REQUIRES(context, node >= 0,
errors::InvalidArgument(
"node_ids ", i, "th entry should be >=0, got: ", node));
for (int feature_dim = 0; feature_dim < feature_dims; ++feature_dim) {
const int32_t feature_value = feature(i, feature_dim);
const int32_t bucket =
Expand Down Expand Up @@ -1612,7 +1650,12 @@ class BoostedTreesSparseAggregateStatsOp : public OpKernel {
const int64_t stats_dims = logits_dims + hessians_dims;
const int64_t num_sparse_entries = feature_indices_t->dim_size(0);
const int32_t feature_dims = feature_shape(1);
DCHECK_LE(num_sparse_entries, batch_size * feature_dims);
OP_REQUIRES(context, num_sparse_entries <= batch_size * feature_dims,
errors::InvalidArgument(
"feature_indices dim0 should be <= gradients dim0 * "
"feature_shape[1]. features_indices dim0: ",
num_sparse_entries, " gradients dim0: ", batch_size,
", feature_shape[1]: ", feature_dims));

// Aggregate statistics info to map.
StatsPartitionMap stats_map;
Expand Down
195 changes: 195 additions & 0 deletions tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py
Expand Up @@ -17,9 +17,11 @@

from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import errors
from tensorflow.python.framework import test_util
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import boosted_trees_ops
from tensorflow.python.ops import gen_boosted_trees_ops
from tensorflow.python.ops import sparse_ops
from tensorflow.python.platform import googletest

Expand Down Expand Up @@ -1665,6 +1667,199 @@ def testMakeStatsSummaryNumericalPrecisionMegaBatch(self):
"""Tests numeric precision."""
self._verify_precision(length=50000000)

def testBoostedTreesCalculateBestGainsPerFeatureSecurity(self):
node_id_range = [1, 2]
stats_summary_list = [[[[]]]]
l1 = [1.0]
l2 = [1.0]
tree_complexity = [1.0]
min_node_weight = [1.17]
max_splits = 1
with self.assertRaises((errors.InvalidArgumentError, ValueError)):
gen_boosted_trees_ops.boosted_trees_calculate_best_gains_per_feature(
node_id_range=node_id_range,
stats_summary_list=stats_summary_list,
l1=l1,
l2=l2,
tree_complexity=tree_complexity,
min_node_weight=min_node_weight,
max_splits=max_splits)

def testBoostedTreesCalculateBestFeatureSplitSecurity(self):
node_id_range = [1, 2]
stats_summary = [[[[]]]]
split_type = 'equality'
l1 = [1.0]
l2 = [1.0]
tree_complexity = [1.0]
min_node_weight = [1.17]
logits_dimension = 5
with self.assertRaises((errors.InvalidArgumentError, ValueError)):
gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split(
node_id_range=node_id_range,
stats_summary=stats_summary,
l1=l1,
l2=l2,
tree_complexity=tree_complexity,
min_node_weight=min_node_weight,
logits_dimension=logits_dimension,
split_type=split_type)

def testBoostedTreesCalculateBestFeatureSplitSecurity2(self):
with self.assertRaises((errors.InvalidArgumentError, ValueError)):
gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split(
node_id_range=[0, 8],
stats_summary=[[[[1.0], [2.0], [3.0]]]],
l1=[0.5],
l2=[0.5],
tree_complexity=[0.1],
min_node_weight=[1.0],
logits_dimension=8)

def testBoostedTreesCalculateBestFeatureSplitV2Security(self):
node_id_range = [1, 2]
stats_summaries_list = [[[[[]]]]]
split_types = ['inequality']
candidate_feature_ids = [1, 2, 3, 4]
l1 = [1.0]
l2 = [1.0]
tree_complexity = [1.0]
min_node_weight = [1.17]
logits_dimension = 5
with self.assertRaises((errors.InvalidArgumentError, ValueError)):
gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split_v2(
node_id_range=node_id_range,
stats_summaries_list=stats_summaries_list,
split_types=split_types,
candidate_feature_ids=candidate_feature_ids,
l1=l1,
l2=l2,
tree_complexity=tree_complexity,
min_node_weight=min_node_weight,
logits_dimension=logits_dimension)

def testBoostedTreesSparseCalculateBestFeatureSplitSecurity(self):
node_id_range = []
stats_summary_indices = [[]]
stats_summary_values = [1.0]
stats_summary_shape = [1, 1, 1, 1]
l1 = [1.0]
l2 = [1.0]
tree_complexity = [0.5]
min_node_weight = [1.0]
logits_dimension = 3
split_type = 'inequality'
with self.assertRaises((errors.InvalidArgumentError, ValueError)):
gen_boosted_trees_ops.boosted_trees_sparse_calculate_best_feature_split(
node_id_range=node_id_range,
stats_summary_indices=stats_summary_indices,
stats_summary_values=stats_summary_values,
stats_summary_shape=stats_summary_shape,
l1=l1,
l2=l2,
tree_complexity=tree_complexity,
min_node_weight=min_node_weight,
logits_dimension=logits_dimension,
split_type=split_type)

def testBoostedTreesSparseCalculateBestFeatureSplitSecurity2(self):
with self.assertRaises((errors.InvalidArgumentError, ValueError)):
gen_boosted_trees_ops.boosted_trees_sparse_calculate_best_feature_split(
node_id_range=[0, 1],
stats_summary_indices=[[0, -1, -1, -1], [1, 0, -1, 0], [1, 0, 0, -1]],
stats_summary_values=[0.1, 0.2, 0.3],
stats_summary_shape=[1, 1, 1, 1],
l1=[0.5],
l2=[0.5],
tree_complexity=[0.1],
min_node_weight=[1.0],
logits_dimension=1)

def testBoostedTreesMakeStatsSummarySecurity(self):
node_ids = [1, 2]
gradients = [[]]
hessians = [[0.2], [0.1]]
bucketized_features_list = [[1], [2]]
max_splits = 3
num_buckets = 3
with self.assertRaises((errors.InvalidArgumentError, ValueError)):
gen_boosted_trees_ops.boosted_trees_make_stats_summary(
node_ids=node_ids,
gradients=gradients,
hessians=hessians,
bucketized_features_list=bucketized_features_list,
max_splits=max_splits,
num_buckets=num_buckets)

def testBoostedTreesMakeStatsSummarySecurity2(self):
node_ids = [1, 2, 3]
gradients = [[0.1], [0.2]]
hessians = [[0.2], [0.1]]
bucketized_features_list = [[1], [2]]
max_splits = 3
num_buckets = 3
with self.assertRaises((errors.InvalidArgumentError, ValueError)):
gen_boosted_trees_ops.boosted_trees_make_stats_summary(
node_ids=node_ids,
gradients=gradients,
hessians=hessians,
bucketized_features_list=bucketized_features_list,
max_splits=max_splits,
num_buckets=num_buckets)

def testBoostedTreesAggregateStatsSecurity(self):
node_ids = [1, 2]
gradients = [[]]
hessians = [[100.0]]
feature = [[0, 0, 0]]
max_splits = 100
num_buckets = 100
with self.assertRaises((errors.InvalidArgumentError, ValueError)):
gen_boosted_trees_ops.boosted_trees_aggregate_stats(
node_ids=node_ids,
gradients=gradients,
hessians=hessians,
feature=feature,
max_splits=max_splits,
num_buckets=num_buckets)

def testBoostedTreesAggregateStatsSecurity2(self):
node_ids = [-10]
gradients = [[0.0, 0.0]]
hessians = [[100.0]]
feature = [[0, 0, 0]]
max_splits = 100
num_buckets = 100
with self.assertRaises((errors.InvalidArgumentError, ValueError)):
self.evaluate(
gen_boosted_trees_ops.boosted_trees_aggregate_stats(
node_ids=node_ids,
gradients=gradients,
hessians=hessians,
feature=feature,
max_splits=max_splits,
num_buckets=num_buckets))

def testBoostedTreesSparseAggregateStatsSecurity(self):
node_ids = []
gradients = [[1.0]]
hessians = [[100.0]]
feature_indices = [[0, 0, 0]]
feature_values = [0, 0, 0]
feature_shape = [0, 0, 0]
max_splits = 100
num_buckets = 100
with self.assertRaises((errors.InvalidArgumentError, ValueError)):
gen_boosted_trees_ops.boosted_trees_sparse_aggregate_stats(
node_ids=node_ids,
gradients=gradients,
hessians=hessians,
feature_indices=feature_indices,
feature_values=feature_values,
feature_shape=feature_shape,
max_splits=max_splits,
num_buckets=num_buckets)


class BestMultiDimFeatureSplitMultiClassV2Op(StatsOpsTest):
"""Tests multi-class/multi-regression for best splits using V2 op."""
Expand Down

0 comments on commit 5c8c9a8

Please sign in to comment.