Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing security fixes in boosted trees ops #52749

Merged
merged 1 commit into from Oct 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
55 changes: 49 additions & 6 deletions tensorflow/core/kernels/boosted_trees/stats_ops.cc
Expand Up @@ -72,7 +72,10 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel {
&stats_summary_list));
const int64_t num_buckets = stats_summary_list[0].dim_size(1);
// Check for single logit: 1 gradient + 1 hessian value.
DCHECK_EQ(stats_summary_list[0].dim_size(2), 2);
OP_REQUIRES(context, stats_summary_list[0].dim_size(2) == 2,
errors::InvalidArgument("stats_summary_list[0] must have "
"exactly 2 dimensions, obtained: ",
stats_summary_list[0].dim_size(2)));
std::vector<TTypes<float, 3>::ConstTensor> stats_summary;
stats_summary.reserve(stats_summary_list.size());
for (const auto& tensor : stats_summary_list) {
Expand Down Expand Up @@ -275,8 +278,13 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel {
const int32_t num_buckets = stats_summary_t->dim_size(2) - 1;
const int32_t logits_dim = logits_dim_;
const int32_t hessian_dim = stats_summary_t->dim_size(3) - logits_dim;
DCHECK_GT(hessian_dim, 0);
DCHECK_LE(hessian_dim, logits_dim * logits_dim);
OP_REQUIRES(context, hessian_dim > 0,
errors::InvalidArgument("hessian dim should be < 0, got ",
hessian_dim));
OP_REQUIRES(context, hessian_dim <= logits_dim * logits_dim,
errors::InvalidArgument(
"hessian dim should be <= ", logits_dim * logits_dim,
" but got: ", hessian_dim));

const Tensor* l1_t;
OP_REQUIRES_OK(context, context->input("l1", &l1_t));
Expand Down Expand Up @@ -624,8 +632,13 @@ class BoostedTreesCalculateBestFeatureSplitV2 : public OpKernel {
const int32_t logits_dim = logits_dim_;
const int32_t hessian_dim =
stats_summaries_list[0].dim_size(3) - logits_dim;
DCHECK_GT(hessian_dim, 0);
DCHECK_LE(hessian_dim, logits_dim * logits_dim);
OP_REQUIRES(context, hessian_dim > 0,
errors::InvalidArgument("hessian dim should be < 0, got ",
hessian_dim));
OP_REQUIRES(context, hessian_dim <= logits_dim * logits_dim,
errors::InvalidArgument(
"hessian dim should be <= ", logits_dim * logits_dim,
" but got: ", hessian_dim));

// Vector of stats_summaries; each element is stats for feature of shape
// [max_splits, feature_dim, num_buckets, logits_dim + hessian_dim].
Expand Down Expand Up @@ -1002,6 +1015,10 @@ class BoostedTreesSparseCalculateBestFeatureSplitOp : public OpKernel {
const Tensor* node_id_range_t;
OP_REQUIRES_OK(context, context->input("node_id_range", &node_id_range_t));
const auto node_id_range = node_id_range_t->vec<int32>();
OP_REQUIRES(
context, node_id_range.size() == 2,
errors::InvalidArgument("node_id_range should have 2 entries, got: ",
node_id_range.size()));
const int32_t node_id_first = node_id_range(0); // inclusive
const int32_t node_id_last = node_id_range(1); // exclusive

Expand Down Expand Up @@ -1075,6 +1092,11 @@ class BoostedTreesSparseCalculateBestFeatureSplitOp : public OpKernel {
"dims, the last value in stats_summary_shape, which was ",
stats_dims, ". At index (", idx,
", 4), stats_summary_indices contains value ", stat_dim));
OP_REQUIRES(context, stat_dim >= 0,
errors::InvalidArgument(
"Stat dim, the sum of logits dim and hessian dim in "
"stats_summary_indices, should be >= 0, which was ",
stat_dim, " at index ", idx));
std::pair<FeatureMapIterator, bool> const& f_insert_result = f_map.insert(
FeatureMapIterator::value_type(feature_dim, BucketMap()));
auto& b_map = f_insert_result.first->second;
Expand Down Expand Up @@ -1307,6 +1329,12 @@ class BoostedTreesMakeStatsSummaryOp : public OpKernel {
const Tensor* gradients_t;
OP_REQUIRES_OK(context, context->input("gradients", &gradients_t));
const auto gradients = gradients_t->matrix<float>();
OP_REQUIRES(
context, node_ids.size() == gradients.dimension(0),
errors::InvalidArgument(
"node_ids size should match 0th dim of gradients. node ids "
"size: ",
node_ids.size(), ", gradients dim0: ", gradients.dimension(0)));
// hessians
const Tensor* hessians_t;
OP_REQUIRES_OK(context, context->input("hessians", &hessians_t));
Expand Down Expand Up @@ -1376,6 +1404,13 @@ class BoostedTreesAggregateStatsOp : public OpKernel {
OP_REQUIRES_OK(context, context->input("gradients", &gradients_t));
const auto gradients = gradients_t->matrix<float>();

OP_REQUIRES(
context, node_ids.size() == gradients.dimension(0),
errors::InvalidArgument(
"node_ids size should match 0th dim of gradients. node ids "
"size: ",
node_ids.size(), ", gradients dim0: ", gradients.dimension(0)));

// hessians.
const Tensor* hessians_t;
OP_REQUIRES_OK(context, context->input("hessians", &hessians_t));
Expand Down Expand Up @@ -1406,6 +1441,9 @@ class BoostedTreesAggregateStatsOp : public OpKernel {

for (int i = 0; i < batch_size; ++i) {
const int32_t node = node_ids(i);
OP_REQUIRES(context, node >= 0,
errors::InvalidArgument(
"node_ids ", i, "th entry should be >=0, got: ", node));
for (int feature_dim = 0; feature_dim < feature_dims; ++feature_dim) {
const int32_t feature_value = feature(i, feature_dim);
const int32_t bucket =
Expand Down Expand Up @@ -1612,7 +1650,12 @@ class BoostedTreesSparseAggregateStatsOp : public OpKernel {
const int64_t stats_dims = logits_dims + hessians_dims;
const int64_t num_sparse_entries = feature_indices_t->dim_size(0);
const int32_t feature_dims = feature_shape(1);
DCHECK_LE(num_sparse_entries, batch_size * feature_dims);
OP_REQUIRES(context, num_sparse_entries <= batch_size * feature_dims,
errors::InvalidArgument(
"feature_indices dim0 should be <= gradients dim0 * "
"feature_shape[1]. features_indices dim0: ",
num_sparse_entries, " gradients dim0: ", batch_size,
", feature_shape[1]: ", feature_dims));

// Aggregate statistics info to map.
StatsPartitionMap stats_map;
Expand Down
195 changes: 195 additions & 0 deletions tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py
Expand Up @@ -21,9 +21,11 @@

from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import errors
from tensorflow.python.framework import test_util
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import boosted_trees_ops
from tensorflow.python.ops import gen_boosted_trees_ops
from tensorflow.python.ops import sparse_ops
from tensorflow.python.platform import googletest

Expand Down Expand Up @@ -1669,6 +1671,199 @@ def testMakeStatsSummaryNumericalPrecisionMegaBatch(self):
"""Tests numeric precision."""
self._verify_precision(length=50000000)

def testBoostedTreesCalculateBestGainsPerFeatureSecurity(self):
node_id_range = [1, 2]
stats_summary_list = [[[[]]]]
l1 = [1.0]
l2 = [1.0]
tree_complexity = [1.0]
min_node_weight = [1.17]
max_splits = 1
with self.assertRaises((errors.InvalidArgumentError, ValueError)):
gen_boosted_trees_ops.boosted_trees_calculate_best_gains_per_feature(
node_id_range=node_id_range,
stats_summary_list=stats_summary_list,
l1=l1,
l2=l2,
tree_complexity=tree_complexity,
min_node_weight=min_node_weight,
max_splits=max_splits)

def testBoostedTreesCalculateBestFeatureSplitSecurity(self):
node_id_range = [1, 2]
stats_summary = [[[[]]]]
split_type = 'equality'
l1 = [1.0]
l2 = [1.0]
tree_complexity = [1.0]
min_node_weight = [1.17]
logits_dimension = 5
with self.assertRaises((errors.InvalidArgumentError, ValueError)):
gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split(
node_id_range=node_id_range,
stats_summary=stats_summary,
l1=l1,
l2=l2,
tree_complexity=tree_complexity,
min_node_weight=min_node_weight,
logits_dimension=logits_dimension,
split_type=split_type)

def testBoostedTreesCalculateBestFeatureSplitSecurity2(self):
with self.assertRaises((errors.InvalidArgumentError, ValueError)):
gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split(
node_id_range=[0, 8],
stats_summary=[[[[1.0], [2.0], [3.0]]]],
l1=[0.5],
l2=[0.5],
tree_complexity=[0.1],
min_node_weight=[1.0],
logits_dimension=8)

def testBoostedTreesCalculateBestFeatureSplitV2Security(self):
node_id_range = [1, 2]
stats_summaries_list = [[[[[]]]]]
split_types = ['inequality']
candidate_feature_ids = [1, 2, 3, 4]
l1 = [1.0]
l2 = [1.0]
tree_complexity = [1.0]
min_node_weight = [1.17]
logits_dimension = 5
with self.assertRaises((errors.InvalidArgumentError, ValueError)):
gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split_v2(
node_id_range=node_id_range,
stats_summaries_list=stats_summaries_list,
split_types=split_types,
candidate_feature_ids=candidate_feature_ids,
l1=l1,
l2=l2,
tree_complexity=tree_complexity,
min_node_weight=min_node_weight,
logits_dimension=logits_dimension)

def testBoostedTreesSparseCalculateBestFeatureSplitSecurity(self):
node_id_range = []
stats_summary_indices = [[]]
stats_summary_values = [1.0]
stats_summary_shape = [1, 1, 1, 1]
l1 = [1.0]
l2 = [1.0]
tree_complexity = [0.5]
min_node_weight = [1.0]
logits_dimension = 3
split_type = 'inequality'
with self.assertRaises((errors.InvalidArgumentError, ValueError)):
gen_boosted_trees_ops.boosted_trees_sparse_calculate_best_feature_split(
node_id_range=node_id_range,
stats_summary_indices=stats_summary_indices,
stats_summary_values=stats_summary_values,
stats_summary_shape=stats_summary_shape,
l1=l1,
l2=l2,
tree_complexity=tree_complexity,
min_node_weight=min_node_weight,
logits_dimension=logits_dimension,
split_type=split_type)

def testBoostedTreesSparseCalculateBestFeatureSplitSecurity2(self):
with self.assertRaises((errors.InvalidArgumentError, ValueError)):
gen_boosted_trees_ops.boosted_trees_sparse_calculate_best_feature_split(
node_id_range=[0, 1],
stats_summary_indices=[[0, -1, -1, -1], [1, 0, -1, 0], [1, 0, 0, -1]],
stats_summary_values=[0.1, 0.2, 0.3],
stats_summary_shape=[1, 1, 1, 1],
l1=[0.5],
l2=[0.5],
tree_complexity=[0.1],
min_node_weight=[1.0],
logits_dimension=1)

def testBoostedTreesMakeStatsSummarySecurity(self):
node_ids = [1, 2]
gradients = [[]]
hessians = [[0.2], [0.1]]
bucketized_features_list = [[1], [2]]
max_splits = 3
num_buckets = 3
with self.assertRaises((errors.InvalidArgumentError, ValueError)):
gen_boosted_trees_ops.boosted_trees_make_stats_summary(
node_ids=node_ids,
gradients=gradients,
hessians=hessians,
bucketized_features_list=bucketized_features_list,
max_splits=max_splits,
num_buckets=num_buckets)

def testBoostedTreesMakeStatsSummarySecurity2(self):
node_ids = [1, 2, 3]
gradients = [[0.1], [0.2]]
hessians = [[0.2], [0.1]]
bucketized_features_list = [[1], [2]]
max_splits = 3
num_buckets = 3
with self.assertRaises((errors.InvalidArgumentError, ValueError)):
gen_boosted_trees_ops.boosted_trees_make_stats_summary(
node_ids=node_ids,
gradients=gradients,
hessians=hessians,
bucketized_features_list=bucketized_features_list,
max_splits=max_splits,
num_buckets=num_buckets)

def testBoostedTreesAggregateStatsSecurity(self):
node_ids = [1, 2]
gradients = [[]]
hessians = [[100.0]]
feature = [[0, 0, 0]]
max_splits = 100
num_buckets = 100
with self.assertRaises((errors.InvalidArgumentError, ValueError)):
gen_boosted_trees_ops.boosted_trees_aggregate_stats(
node_ids=node_ids,
gradients=gradients,
hessians=hessians,
feature=feature,
max_splits=max_splits,
num_buckets=num_buckets)

def testBoostedTreesAggregateStatsSecurity2(self):
node_ids = [-10]
gradients = [[0.0, 0.0]]
hessians = [[100.0]]
feature = [[0, 0, 0]]
max_splits = 100
num_buckets = 100
with self.assertRaises((errors.InvalidArgumentError, ValueError)):
self.evaluate(
gen_boosted_trees_ops.boosted_trees_aggregate_stats(
node_ids=node_ids,
gradients=gradients,
hessians=hessians,
feature=feature,
max_splits=max_splits,
num_buckets=num_buckets))

def testBoostedTreesSparseAggregateStatsSecurity(self):
node_ids = []
gradients = [[1.0]]
hessians = [[100.0]]
feature_indices = [[0, 0, 0]]
feature_values = [0, 0, 0]
feature_shape = [0, 0, 0]
max_splits = 100
num_buckets = 100
with self.assertRaises((errors.InvalidArgumentError, ValueError)):
gen_boosted_trees_ops.boosted_trees_sparse_aggregate_stats(
node_ids=node_ids,
gradients=gradients,
hessians=hessians,
feature_indices=feature_indices,
feature_values=feature_values,
feature_shape=feature_shape,
max_splits=max_splits,
num_buckets=num_buckets)


class BestMultiDimFeatureSplitMultiClassV2Op(StatsOpsTest):
"""Tests multi-class/multi-regression for best splits using V2 op."""
Expand Down