tensorflow · frankchn · Oct 4, 2017 · Oct 3, 2017 · Oct 3, 2017 · Oct 3, 2017
diff --git a/tensorflow/contrib/boosted_trees/kernels/prediction_ops.cc b/tensorflow/contrib/boosted_trees/kernels/prediction_ops.cc
@@ -59,8 +59,27 @@ const char* kApplyDropoutAttributeName = "apply_dropout";
 const char* kApplyAveragingAttributeName = "apply_averaging";
 const char* kDropoutInfoOutputTensorName = "drop_out_tree_indices_weights";
 const char* kPredictionsTensorName = "predictions";
-const char* kNoDropoutPredictionsTensorName = "no_dropout_predictions";
+
+void CalculateTreesToInclude(
+    const boosted_trees::trees::DecisionTreeEnsembleConfig& config,
+    const std::vector<int32>& trees_to_drop, const int32 num_trees,
+    const bool only_finalized, std::vector<int32>* trees_to_include) {
+  trees_to_include->reserve(num_trees - trees_to_drop.size());
+
+  int32 index = 0;
+  // This assumes that trees_to_drop is a sorted list of tree ids.
+  for (int32 tree = 0; tree < num_trees; ++tree) {
+    if ((!trees_to_drop.empty() && index < trees_to_drop.size() &&
+         trees_to_drop[index] == tree) ||
+        (only_finalized && config.tree_metadata_size() > 0 &&
+         !config.tree_metadata(tree).is_finalized())) {
+      ++index;
+      continue;
+    }
+    trees_to_include->push_back(tree);
+  }
 }
+}  // namespace
 
 class GradientTreesPredictionOp : public OpKernel {
  public:
@@ -226,6 +245,13 @@ class GradientTreesPredictionOp : public OpKernel {
                                   weights, &dropped_trees, &original_weights));
     }
 
+    // Prepare the list of trees to include in the prediction.
+    std::vector<int32> trees_to_include;
+    CalculateTreesToInclude(
+        ensemble_resource->decision_tree_ensemble(), dropped_trees,
+        ensemble_resource->decision_tree_ensemble().trees_size(),
+        only_finalized_trees_, &trees_to_include);
+
     // Allocate output predictions matrix.
     Tensor* output_predictions_t = nullptr;
     OP_REQUIRES_OK(
@@ -234,44 +260,32 @@ class GradientTreesPredictionOp : public OpKernel {
                                           &output_predictions_t));
     auto output_predictions = output_predictions_t->matrix<float>();
 
-    Tensor* output_no_dropout_predictions_t = nullptr;
-    OP_REQUIRES_OK(
-        context, context->allocate_output(kNoDropoutPredictionsTensorName,
-                                          {batch_size, prediction_vector_size_},
-                                          &output_no_dropout_predictions_t));
-    auto output_no_dropout_predictions =
-        output_no_dropout_predictions_t->matrix<float>();
-
     // Run predictor.
     thread::ThreadPool* const worker_threads =
         context->device()->tensorflow_cpu_worker_threads()->workers;
 
     if (apply_averaging_) {
       DecisionTreeEnsembleConfig adjusted =
           ensemble_resource->decision_tree_ensemble();
-
       const int start_averaging = std::max(
           0.0,
           averaging_config_.config_case() ==
                   AveragingConfig::kAverageLastNTreesFieldNumber
               ? adjusted.trees_size() - averaging_config_.average_last_n_trees()
               : adjusted.trees_size() *
                     (1.0 - averaging_config_.average_last_percent_trees()));
-
       const int num_ensembles = adjusted.trees_size() - start_averaging;
       for (int i = start_averaging; i < adjusted.trees_size(); ++i) {
         float weight = adjusted.tree_weights(i);
         adjusted.mutable_tree_weights()->Set(
             i, weight * (num_ensembles - i + start_averaging) / num_ensembles);
       }
-      MultipleAdditiveTrees::Predict(
-          adjusted, only_finalized_trees_, dropped_trees, batch_features,
-          worker_threads, output_predictions, output_no_dropout_predictions);
+      MultipleAdditiveTrees::Predict(adjusted, trees_to_include, batch_features,
+                                     worker_threads, output_predictions);
     } else {
       MultipleAdditiveTrees::Predict(
-          ensemble_resource->decision_tree_ensemble(), only_finalized_trees_,
-          dropped_trees, batch_features, worker_threads, output_predictions,
-          output_no_dropout_predictions);
+          ensemble_resource->decision_tree_ensemble(), trees_to_include,
+          batch_features, worker_threads, output_predictions);
     }
 
     // Output dropped trees and original weights.

diff --git a/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees.cc b/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees.cc
@@ -21,110 +21,52 @@ namespace tensorflow {
 namespace boosted_trees {
 namespace models {
 
-namespace {
-void CalculateTreesToKeep(
-    const boosted_trees::trees::DecisionTreeEnsembleConfig& config,
-    const std::vector<int32>& trees_to_drop, const int32 num_trees,
-    const bool only_finalized, std::vector<int32>* trees_to_keep) {
-  trees_to_keep->reserve(num_trees - trees_to_drop.size());
-
-  int32 index = 0;
-  // This assumes that trees_to_drop is a sorted list of tree ids.
-  for (int32 tree = 0; tree < num_trees; ++tree) {
-    if ((!trees_to_drop.empty() && index < trees_to_drop.size() &&
-         trees_to_drop[index] == tree) ||
-        (only_finalized && config.tree_metadata_size() > 0 &&
-         !config.tree_metadata(tree).is_finalized())) {
-      ++index;
-      continue;
-    }
-    trees_to_keep->push_back(tree);
-  }
-}
-
-void UpdatePredictions(
-    const int32 index_1, const int32 index_2, const float value,
-    tensorflow::TTypes<float>::Matrix* output_predictions,
-    tensorflow::TTypes<float>::Matrix* additional_output_predictions) {
-  (*output_predictions)(index_1, index_2) += value;
-
-  if (additional_output_predictions != nullptr) {
-    (*additional_output_predictions)(index_1, index_2) += value;
-  }
-}
-
-void UpdatePredictionsBasedOnTree(
-    const boosted_trees::trees::DecisionTreeEnsembleConfig& config,
-    const int32 tree_idx, const boosted_trees::utils::Example& example,
-    tensorflow::TTypes<float>::Matrix* output_predictions,
-    tensorflow::TTypes<float>::Matrix* additional_output_predictions) {
-  const boosted_trees::trees::DecisionTreeConfig& tree = config.trees(tree_idx);
-  const float tree_weight = config.tree_weights(tree_idx);
-  const int leaf_idx = trees::DecisionTree::Traverse(tree, 0, example);
-  QCHECK(leaf_idx >= 0) << "Invalid tree: " << tree.DebugString();
-  const auto& leaf_node = tree.nodes(leaf_idx);
-  QCHECK(leaf_node.has_leaf())
-      << "Invalid leaf node: " << leaf_node.DebugString();
-  if (leaf_node.leaf().has_sparse_vector()) {
-    const auto& leaf = leaf_node.leaf().sparse_vector();
-    QCHECK_EQ(leaf.index_size(), leaf.value_size());
-    for (size_t class_idx = 0; class_idx < leaf.index_size(); ++class_idx) {
-      const float value = tree_weight * leaf.value(class_idx);
-
-      UpdatePredictions(example.example_idx, leaf.index(class_idx), value,
-                        output_predictions, additional_output_predictions);
-    }
-  } else {
-    QCHECK(leaf_node.leaf().has_vector()) << "Unknown leaf type";
-    const auto& leaf = leaf_node.leaf().vector();
-    for (size_t i = 0; i < leaf.value_size(); ++i) {
-      const float value = tree_weight * leaf.value(i);
-      UpdatePredictions(example.example_idx, i, value, output_predictions,
-                        additional_output_predictions);
-    }
-  }
-}
-
-}  // namespace
-
 void MultipleAdditiveTrees::Predict(
     const boosted_trees::trees::DecisionTreeEnsembleConfig& config,
-    const bool only_finalized_trees, const std::vector<int32>& trees_to_drop,
+    const std::vector<int32>& trees_to_include,
     const boosted_trees::utils::BatchFeatures& features,
-    tensorflow::thread::ThreadPool* worker_threads,
-    tensorflow::TTypes<float>::Matrix output_predictions,
-    tensorflow::TTypes<float>::Matrix no_dropout_predictions) {
+    tensorflow::thread::ThreadPool* const worker_threads,
+    tensorflow::TTypes<float>::Matrix output_predictions) {
   // Zero out predictions as the model is additive.
   output_predictions.setZero();
-  no_dropout_predictions.setZero();
 
   // Get batch size.
   const int64 batch_size = features.batch_size();
   if (batch_size <= 0) {
     return;
   }
 
-  // Prepare the list of trees to keep.
-  std::vector<int32> trees_to_keep;
-  CalculateTreesToKeep(config, trees_to_drop, config.trees_size(),
-                       only_finalized_trees, &trees_to_keep);
-
   // Lambda for doing a block of work.
-  auto update_predictions = [&config, &features, &trees_to_keep, &trees_to_drop,
-                             &output_predictions,
-                             &no_dropout_predictions](int64 start, int64 end) {
+  auto update_predictions = [&config, &features, &trees_to_include,
+                             &output_predictions](int64 start, int64 end) {
     auto examples_iterable = features.examples_iterable(start, end);
     for (const auto& example : examples_iterable) {
-      for (const int32 tree_idx : trees_to_keep) {
-        UpdatePredictionsBasedOnTree(config, tree_idx, example,
-                                     &output_predictions,
-                                     &no_dropout_predictions);
-      }
-
-      // Now do predictions for dropped trees
-      for (const int32 tree_idx : trees_to_drop) {
-        UpdatePredictionsBasedOnTree(config, tree_idx, example,
-                                     &no_dropout_predictions, nullptr);
+      for (const int32 tree_idx : trees_to_include) {
+        const boosted_trees::trees::DecisionTreeConfig& tree =
+            config.trees(tree_idx);
+        const float tree_weight = config.tree_weights(tree_idx);
+        const int leaf_idx = trees::DecisionTree::Traverse(tree, 0, example);
+        QCHECK(leaf_idx >= 0) << "Invalid tree: " << tree.DebugString();
+        const auto& leaf_node = tree.nodes(leaf_idx);
+        QCHECK(leaf_node.has_leaf())
+            << "Invalid leaf node: " << leaf_node.DebugString();
+        if (leaf_node.leaf().has_sparse_vector()) {
+          const auto& leaf = leaf_node.leaf().sparse_vector();
+          QCHECK_EQ(leaf.index_size(), leaf.value_size());
+          for (size_t logit_dim = 0; logit_dim < leaf.index_size();
+               ++logit_dim) {
+            const float value = tree_weight * leaf.value(logit_dim);
+            output_predictions(example.example_idx, leaf.index(logit_dim)) +=
+                value;
+          }
+        } else {
+          QCHECK(leaf_node.leaf().has_vector()) << "Unknown leaf type";
+          const auto& leaf = leaf_node.leaf().vector();
+          for (size_t i = 0; i < leaf.value_size(); ++i) {
+            const float value = tree_weight * leaf.value(i);
+            output_predictions(example.example_idx, i) += value;
+          }
+        }
       }
     }
   };

diff --git a/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees.h b/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees.h
@@ -32,15 +32,13 @@ namespace models {
 class MultipleAdditiveTrees {
  public:
   // Predict runs tree ensemble on the given batch and updates
-  // output predictions accordingly. The method also returns predictions that
-  // we would get if no dropout was applied.
+  // output predictions accordingly, for the given list of trees.
   static void Predict(
       const boosted_trees::trees::DecisionTreeEnsembleConfig& config,
-      const bool only_finalized_trees, const std::vector<int32>& trees_to_drop,
+      const std::vector<int32>& trees_to_include,
       const boosted_trees::utils::BatchFeatures& features,
-      thread::ThreadPool* const thread_pool,
-      TTypes<float>::Matrix output_predictions,
-      TTypes<float>::Matrix no_dropout_predictions);
+      tensorflow::thread::ThreadPool* const worker_threads,
+      tensorflow::TTypes<float>::Matrix output_predictions);
 };
 
 }  // namespace models