Cache pointer to subgraph's tensors.

Getting each tensor involves a virtual function call and another function call and bounds checking. PiperOrigin-RevId: 472724647
tensorflow · Sep 7, 2022 · 7528df8 · 7528df8
1 parent b0d9b10
commit 7528df8
Show file tree

Hide file tree

Showing 8 changed files with 39 additions and 25 deletions.
diff --git a/tensorflow/lite/arena_planner.cc b/tensorflow/lite/arena_planner.cc
@@ -69,9 +69,10 @@ TfLiteStatus ArenaPlanner::ResetAllocations() {
 }
 
 TfLiteStatus ArenaPlanner::ResetAllocationsAfter(int node) {
+  TfLiteTensor* tensors = graph_info_->tensors();
   for (int i = 0; i < static_cast<int>(allocs_.size()); ++i) {
     if (allocs_[i].first_node > node && allocs_[i].size > 0) {
-      TfLiteTensor& tensor = *graph_info_->tensor(i);
+      TfLiteTensor& tensor = tensors[i];
       if (tensor.allocation_type == kTfLiteArenaRw) {
         TF_LITE_ENSURE_STATUS(arena_.Deallocate(context_, allocs_[i]));
         allocs_[i].reset();
@@ -85,13 +86,14 @@ TfLiteStatus ArenaPlanner::ResetAllocationsAfter(int node) {
 
 TfLiteStatus ArenaPlanner::PlanAllocations() {
   // Invalidate any existing data.
+  const size_t num_tensors = graph_info_->num_tensors();
   TF_LITE_ENSURE_STATUS(ResetAllocations());
   // Maybe other verb instead of 'Assigned'
-  alloc_node_.assign(graph_info_->num_tensors(), kNodeNotAssigned);
-  dealloc_node_.assign(graph_info_->num_tensors(), kNodeNotAssigned);
+  alloc_node_.assign(num_tensors, kNodeNotAssigned);
+  dealloc_node_.assign(num_tensors, kNodeNotAssigned);
 
   // Keeps track of references to each tensor.
-  std::vector<int> refcounts(graph_info_->num_tensors(), 0);
+  std::vector<int> refcounts(num_tensors, 0);
 
   auto allocate = [this](int node, int tensor) -> TfLiteStatus {
     if (alloc_node_[tensor] != kNodeNotAssigned) {
@@ -213,8 +215,9 @@ TfLiteStatus ArenaPlanner::ExecuteAllocations(int first_node, int last_node) {
   TF_LITE_ENSURE_STATUS(CalculateAllocations(first_node, last_node));
   TF_LITE_ENSURE_STATUS(Commit());
 
+  TfLiteTensor* tensors = graph_info_->tensors();
   for (int i = 0; i < static_cast<int>(num_tensors); ++i) {
-    TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i));
+    TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i, tensors[i]));
   }
 
   return kTfLiteOk;
@@ -224,8 +227,9 @@ TfLiteStatus ArenaPlanner::ReleaseNonPersistentMemory() {
   // Clear non-persistent arena's buffer.
   TF_LITE_ENSURE_STATUS(arena_.ReleaseBuffer());
   // Set data pointers for all non-persistent tensors to nullptr.
+  TfLiteTensor* tensors = graph_info_->tensors();
   for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
-    TfLiteTensor& tensor = *graph_info_->tensor(i);
+    TfLiteTensor& tensor = tensors[i];
     if (tensor.allocation_type == kTfLiteArenaRw) {
       tensor.data.raw = nullptr;
     }
@@ -237,10 +241,11 @@ TfLiteStatus ArenaPlanner::AcquireNonPersistentMemory() {
   // First commit arena_ to allocate underlying buffer.
   TF_LITE_ENSURE_STATUS(arena_.Commit(context_));
   // Resolve allocations for all tensors not on the persistent arena.
+  TfLiteTensor* tensors = graph_info_->tensors();
   for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
-    TfLiteTensor& tensor = *graph_info_->tensor(i);
+    TfLiteTensor& tensor = tensors[i];
     if (tensor.allocation_type == kTfLiteArenaRw) {
-      TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i));
+      TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i, tensors[i]));
     }
   }
   return kTfLiteOk;
@@ -270,31 +275,29 @@ TfLiteStatus ArenaPlanner::Commit() {
 
 std::vector<int32_t> ArenaPlanner::CreateTensorAllocationVector(int first_node,
                                                                 int last_node) {
-  auto tensor_compare = [this](int idx1, int idx2) {
+  const TfLiteTensor* tensors = this->graph_info_->tensors();
+  auto tensor_compare = [&](int idx1, int idx2) {
     // Tensors that have lifespan through the whole model inference time are
     // allocated at the beginning of memory slice. Their respective order
     // doesn't matter in fact, so here they are sorted by index.
-    if (this->alloc_node_[idx1] == 0 &&
-        this->dealloc_node_[idx1] == kNodeNotAssigned) {
-      if (this->alloc_node_[idx2] == 0 &&
-          this->dealloc_node_[idx2] == kNodeNotAssigned) {
+    if (alloc_node_[idx1] == 0 && dealloc_node_[idx1] == kNodeNotAssigned) {
+      if (alloc_node_[idx2] == 0 && dealloc_node_[idx2] == kNodeNotAssigned) {
         return idx1 < idx2;
       }
       return true;
     }
-    if (this->alloc_node_[idx2] == 0 &&
-        this->dealloc_node_[idx2] == kNodeNotAssigned) {
+    if (alloc_node_[idx2] == 0 && dealloc_node_[idx2] == kNodeNotAssigned) {
       return false;
     }
 
     // All other tensors are sorted in non-increasing order of their size.
-    auto size1 = this->graph_info_->tensor(idx1)->bytes;
-    auto size2 = this->graph_info_->tensor(idx2)->bytes;
+    auto size1 = tensors[idx1].bytes;
+    auto size2 = tensors[idx2].bytes;
     if (size1 != size2) {
       return size1 > size2;
     }
     // Tensors with equal size are sorted in order of their allocation time.
-    return this->alloc_node_[idx1] < this->alloc_node_[idx2];
+    return alloc_node_[idx1] < alloc_node_[idx2];
   };
 
   std::vector<int32_t> tensor_order;
@@ -317,8 +320,9 @@ TfLiteStatus ArenaPlanner::CalculateAllocations(int first_node, int last_node) {
       CreateTensorAllocationVector(first_node, last_node);
 
   // Deallocate if the tensor was already allocated.
+  TfLiteTensor* tensors = graph_info_->tensors();
   for (const auto& tensor_index : tensor_order) {
-    TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
+    TfLiteTensor& tensor = tensors[tensor_index];
     if (tensor.allocation_type == kTfLiteArenaRw &&
         allocs_[tensor_index].size != 0) {
       TF_LITE_ENSURE_STATUS(arena_.Deallocate(context_, allocs_[tensor_index]));
@@ -327,7 +331,7 @@ TfLiteStatus ArenaPlanner::CalculateAllocations(int first_node, int last_node) {
 
   // Vector of ids of already allocated tensors, ordered by offset.
   for (const auto& tensor_index : tensor_order) {
-    TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
+    TfLiteTensor& tensor = tensors[tensor_index];
     if (tensor.allocation_type == kTfLiteArenaRw) {
       TF_LITE_ENSURE_STATUS(
           arena_.Allocate(context_, tensor_alignment_, tensor.bytes,
@@ -347,8 +351,8 @@ TfLiteStatus ArenaPlanner::CalculateAllocations(int first_node, int last_node) {
   return kTfLiteOk;
 }
 
-TfLiteStatus ArenaPlanner::ResolveTensorAllocation(int tensor_index) {
-  TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
+TfLiteStatus ArenaPlanner::ResolveTensorAllocation(int tensor_index,
+                                                   TfLiteTensor& tensor) {
   if (tensor.allocation_type == kTfLiteArenaRw) {
     // Skip resolution if the size of the tensor is zero, leaving it as a
     // nullptr.

diff --git a/tensorflow/lite/arena_planner.h b/tensorflow/lite/arena_planner.h
@@ -91,7 +91,7 @@ class ArenaPlanner : public MemoryPlanner {
 
   // Assign absolute memory location to a tensor, based on its relative
   // position inside the corresponding arena buffer.
-  TfLiteStatus ResolveTensorAllocation(int tensor_index);
+  TfLiteStatus ResolveTensorAllocation(int tensor_index, TfLiteTensor& tensor);
 
   // Register an allocation for all internal (temporary) tensors of
   // 'node_index'.

diff --git a/tensorflow/lite/arena_planner_test.cc b/tensorflow/lite/arena_planner_test.cc
@@ -154,6 +154,7 @@ class TestGraphInfo : public GraphInfo {
   explicit TestGraphInfo(TestGraph* graph) : graph_(graph) {}
 
   size_t num_tensors() const override { return graph_->tensors()->size(); }
+  TfLiteTensor* tensors() override { return graph_->tensors()->data(); }
   TfLiteTensor* tensor(size_t index) override {
     return &graph_->tensors()->at(index);
   }

diff --git a/tensorflow/lite/core/subgraph.cc b/tensorflow/lite/core/subgraph.cc
@@ -200,6 +200,7 @@ class InterpreterInfo : public GraphInfo {
   explicit InterpreterInfo(Subgraph* subgraph) : subgraph_(subgraph) {}
 
   size_t num_tensors() const override { return subgraph_->tensors_size(); }
+  TfLiteTensor* tensors() override { return subgraph_->tensors(); }
   TfLiteTensor* tensor(size_t index) override {
     return subgraph_->tensor(index);
   }

diff --git a/tensorflow/lite/core/subgraph.h b/tensorflow/lite/core/subgraph.h
@@ -144,6 +144,9 @@ class Subgraph {
       bool is_variable = false, const size_t ndims_signature = 0,
       const int* dims_signature = nullptr);
 
+  // Get all tensors in the subgraph.
+  TfLiteTensor* tensors() { return context_.tensors; }
+
   // Get a mutable tensor data structure.
   TfLiteTensor* tensor(int tensor_index) {
     if (tensor_index < 0 ||

diff --git a/tensorflow/lite/graph_info.h b/tensorflow/lite/graph_info.h
@@ -31,13 +31,16 @@ class GraphInfo {
  public:
   virtual ~GraphInfo() {}
 
-  // Total number of tensors in the graph.
+  // Total number of tensors in the graph. This should be cached when possible.
   virtual size_t num_tensors() const = 0;
 
   // Returns a tensor given its index which is expected to be between 0 and
-  // num_tensors().
+  // num_tensors(). Use tensors() below for iteration as it is much faster.
   virtual TfLiteTensor* tensor(size_t index) = 0;
 
+  // Returns all tensors in the graph
+  virtual TfLiteTensor* tensors() = 0;
+
   // Number of nodes in the current execution plan.
   virtual size_t num_execution_nodes() const = 0;
 

diff --git a/tensorflow/lite/graph_info_test.cc b/tensorflow/lite/graph_info_test.cc
@@ -63,6 +63,7 @@ class SimpleTestGraph : public GraphInfo {
   }
   size_t num_tensors() const override { return tensors_.size(); }
   TfLiteTensor* tensor(size_t index) override { return &tensors_[index]; }
+  TfLiteTensor* tensors() override { return tensors_.data(); }
   const std::vector<int>& inputs() const override { return inputs_; }
   const std::vector<int>& outputs() const override { return outputs_; }
   const std::vector<int>& variables() const override { return variables_; }

diff --git a/tensorflow/lite/simple_planner_test.cc b/tensorflow/lite/simple_planner_test.cc
@@ -137,6 +137,7 @@ class TestGraphInfo : public GraphInfo {
   TfLiteTensor* tensor(size_t index) override {
     return &graph_->tensors()->at(index);
   }
+  TfLiteTensor* tensors() override { return graph_->tensors()->data(); }
   size_t num_execution_nodes() const override { return graph_->nodes().size(); }
   size_t num_total_nodes() const override { return graph_->nodes().size(); }
   const TfLiteNode& node(size_t index) const override {