From 230f11e983c80502dbc794bcd9803d1bacabdd56 Mon Sep 17 00:00:00 2001
From: Sayantan Sarkar <sayantan.sarkar@intel.com>
Date: Tue, 3 Dec 2019 19:07:18 -0800
Subject: [PATCH 01/18] Use backend to create tensor

---
 ngraph_bridge/ngraph_encapsulate_impl.cc | 9 +++++----
 ngraph_bridge/ngraph_encapsulate_op.cc   | 8 ++++++--
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/ngraph_bridge/ngraph_encapsulate_impl.cc b/ngraph_bridge/ngraph_encapsulate_impl.cc
index ab9a779c1..6d999422d 100644
--- a/ngraph_bridge/ngraph_encapsulate_impl.cc
+++ b/ngraph_bridge/ngraph_encapsulate_impl.cc
@@ -432,17 +432,18 @@ std::shared_ptr<ng::runtime::Tensor> NGraphEncapsulateImpl::GetCurrentNgTensor(
   // values. ie, it will not reuse the same space if its rewritten it
   bool tf_tensor_has_changed = current_tf_ptr != last_tf_ptr;
   bool no_ng_tensor_found = last_ng_tensor == nullptr;
-  bool is_cpu = m_op_backend_name == "CPU";
+  bool is_cpu_or_nnpi =
+      (m_op_backend_name == "CPU") || (m_op_backend_name == "NNPI");
 
   // We need to check last_ng_tensor != nullptr, since there are cases where
   // at the first call to the ng_exec, both current_dst_ptr (when the
   // output is a 0-sized tensor) and last_dst_ptr (uninitialized at the
   // first call) are nullptr
   // A new tensor needs to be created for sure if no_ng_tensor_found
-  // Additionally for CPU, it needs to be created if tf_tensor_has_changed,
+  // Additionally for CPU/NNPI, it needs to be created if tf_tensor_has_changed,
   // for others, we do not create
   bool need_new_tensor_creation;
-  if (is_cpu) {
+  if (is_cpu_or_nnpi) {
     need_new_tensor_creation = no_ng_tensor_found || tf_tensor_has_changed;
   } else {
     need_new_tensor_creation = no_ng_tensor_found;
@@ -464,7 +465,7 @@ std::shared_ptr<ng::runtime::Tensor> NGraphEncapsulateImpl::GetCurrentNgTensor(
     current_ng_tensor = tensor_from_pipeline;
   } else {
     if (need_new_tensor_creation) {
-      if (is_cpu) {
+      if (is_cpu_or_nnpi) {
         current_ng_tensor = op_backend->create_tensor(ng_element_type, ng_shape,
                                                       current_tf_ptr);
       } else {
diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc
index 1bd7a7d7e..a5f57f70a 100644
--- a/ngraph_bridge/ngraph_encapsulate_op.cc
+++ b/ngraph_bridge/ngraph_encapsulate_op.cc
@@ -198,9 +198,13 @@ NGraphEncapsulateOp::NGraphEncapsulateOp(OpKernelConstruction* ctx)
   BackendManager::SetConfig(ng_encap_impl.GetOpBackend(),
                             additional_attribute_map);
 
-  ng_encap_impl.SetExecCanCreateTensor(
+  // For NNPI (even though executable can create tensor) use backend to create
+  // tensor
+  bool executable_create_tensor =
+      (backend_name != "NNPI") &&
       BackendManager::GetBackend(ng_encap_impl.GetOpBackend())
-          ->executable_can_create_tensors());
+          ->executable_can_create_tensors();
+  ng_encap_impl.SetExecCanCreateTensor(executable_create_tensor);
   NGRAPH_VLOG(5) << "Executable can "
                  << (ng_encap_impl.GetExecCanCreateTensor() ? "" : "not")
                  << " create tensors";

From 84693e4332f27b486b710ccd1d41841646bf0d8d Mon Sep 17 00:00:00 2001
From: Sayantan Sarkar <sayantan.sarkar@intel.com>
Date: Tue, 3 Dec 2019 19:27:17 -0800
Subject: [PATCH 02/18] Minor log

---
 ngraph_bridge/ngraph_encapsulate_impl.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ngraph_bridge/ngraph_encapsulate_impl.cc b/ngraph_bridge/ngraph_encapsulate_impl.cc
index 6d999422d..50aa71649 100644
--- a/ngraph_bridge/ngraph_encapsulate_impl.cc
+++ b/ngraph_bridge/ngraph_encapsulate_impl.cc
@@ -466,6 +466,7 @@ std::shared_ptr<ng::runtime::Tensor> NGraphEncapsulateImpl::GetCurrentNgTensor(
   } else {
     if (need_new_tensor_creation) {
       if (is_cpu_or_nnpi) {
+        NGRAPH_VLOG(0) << "Backend creating tensor with tf pointer";
         current_ng_tensor = op_backend->create_tensor(ng_element_type, ng_shape,
                                                       current_tf_ptr);
       } else {

From de6b5bd29f2c1fd69270bd3a41ee3adb14b105b3 Mon Sep 17 00:00:00 2001
From: Sayantan Sarkar <sayantan.sarkar@intel.com>
Date: Tue, 3 Dec 2019 19:47:46 -0800
Subject: [PATCH 03/18] Fix backend detection

---
 ngraph_bridge/ngraph_encapsulate_impl.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ngraph_bridge/ngraph_encapsulate_impl.cc b/ngraph_bridge/ngraph_encapsulate_impl.cc
index 50aa71649..c0b0bc4f0 100644
--- a/ngraph_bridge/ngraph_encapsulate_impl.cc
+++ b/ngraph_bridge/ngraph_encapsulate_impl.cc
@@ -432,8 +432,9 @@ std::shared_ptr<ng::runtime::Tensor> NGraphEncapsulateImpl::GetCurrentNgTensor(
   // values. ie, it will not reuse the same space if its rewritten it
   bool tf_tensor_has_changed = current_tf_ptr != last_tf_ptr;
   bool no_ng_tensor_found = last_ng_tensor == nullptr;
-  bool is_cpu_or_nnpi =
-      (m_op_backend_name == "CPU") || (m_op_backend_name == "NNPI");
+  // m_op_backend_name might be BE:0, check if it starts with BE
+  bool is_cpu_or_nnpi = (m_op_backend_name.find("CPU") == 0) ||
+                        (m_op_backend_name.find("NNPI") == 0);
 
   // We need to check last_ng_tensor != nullptr, since there are cases where
   // at the first call to the ng_exec, both current_dst_ptr (when the
@@ -466,7 +467,6 @@ std::shared_ptr<ng::runtime::Tensor> NGraphEncapsulateImpl::GetCurrentNgTensor(
   } else {
     if (need_new_tensor_creation) {
       if (is_cpu_or_nnpi) {
-        NGRAPH_VLOG(0) << "Backend creating tensor with tf pointer";
         current_ng_tensor = op_backend->create_tensor(ng_element_type, ng_shape,
                                                       current_tf_ptr);
       } else {

From 91cfc0aa2be809802c6a769f8311f4be6eca7fd3 Mon Sep 17 00:00:00 2001
From: Sayantan Sarkar <sayantan.sarkar@intel.com>
Date: Wed, 4 Dec 2019 10:15:42 -0800
Subject: [PATCH 04/18] Make sure executable_can_create_tensors executes and is
 not optimized away

---
 ngraph_bridge/ngraph_encapsulate_op.cc | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc
index a5f57f70a..e5ab0ead6 100644
--- a/ngraph_bridge/ngraph_encapsulate_op.cc
+++ b/ngraph_bridge/ngraph_encapsulate_op.cc
@@ -200,10 +200,12 @@ NGraphEncapsulateOp::NGraphEncapsulateOp(OpKernelConstruction* ctx)
 
   // For NNPI (even though executable can create tensor) use backend to create
   // tensor
+  // Keep the executable_can_create_tensors check before the
+  // backend_name!="NNPI"
   bool executable_create_tensor =
-      (backend_name != "NNPI") &&
       BackendManager::GetBackend(ng_encap_impl.GetOpBackend())
-          ->executable_can_create_tensors();
+          ->executable_can_create_tensors() &&
+      (backend_name != "NNPI");
   ng_encap_impl.SetExecCanCreateTensor(executable_create_tensor);
   NGRAPH_VLOG(5) << "Executable can "
                  << (ng_encap_impl.GetExecCanCreateTensor() ? "" : "not")

From 01aeb54508cd86ad65f05013e459a56ed10d04c4 Mon Sep 17 00:00:00 2001
From: Sayantan Sarkar <sayantan.sarkar@intel.com>
Date: Thu, 5 Dec 2019 17:52:14 -0800
Subject: [PATCH 05/18] Prints and a test

---
 ngraph_bridge/ngraph_encapsulate_impl.cc |  5 +++-
 ngraph_bridge/ngraph_encapsulate_op.cc   |  2 ++
 test/tf_exec.cpp                         | 35 ++++++++++++++++++++++++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/ngraph_bridge/ngraph_encapsulate_impl.cc b/ngraph_bridge/ngraph_encapsulate_impl.cc
index c0b0bc4f0..b1d0f0871 100644
--- a/ngraph_bridge/ngraph_encapsulate_impl.cc
+++ b/ngraph_bridge/ngraph_encapsulate_impl.cc
@@ -322,6 +322,7 @@ Status NGraphEncapsulateImpl::AllocateNGInputTensors(
         current_src_ptr, last_src_ptr, last_ng_tensor, false, ng_exec,
         op_backend, ng_element_type, ng_shape,
         m_executable_can_create_tensor ? inp_group_from_pipeline[i] : nullptr);
+    cout << "Using input tensor ptr: " << current_ng_tensor << "\n";
     bool is_cpu = m_op_backend_name == "CPU";
 
     if (!is_cpu && current_ng_tensor->get_stale()) {
@@ -410,7 +411,7 @@ Status NGraphEncapsulateImpl::AllocateNGOutputTensors(
         current_dst_ptr, last_dst_ptr, last_ng_tensor, true, ng_exec,
         op_backend, ng_element_type, ng_shape,
         m_executable_can_create_tensor ? out_group_from_pipeline[i] : nullptr);
-
+    cout << "Using output tensor ptr: " << current_ng_tensor << "\n";
     current_ng_tensor->set_stale(true);
     output_caches[i] = std::make_pair(current_dst_ptr, current_ng_tensor);
     ng_outputs.push_back(current_ng_tensor);
@@ -431,6 +432,7 @@ std::shared_ptr<ng::runtime::Tensor> NGraphEncapsulateImpl::GetCurrentNgTensor(
   // NOTE: we assume that TF's pointers WILL change if it actually changes
   // values. ie, it will not reuse the same space if its rewritten it
   bool tf_tensor_has_changed = current_tf_ptr != last_tf_ptr;
+  cout << "tf_tensor_has_changed: " << tf_tensor_has_changed << "\n";
   bool no_ng_tensor_found = last_ng_tensor == nullptr;
   // m_op_backend_name might be BE:0, check if it starts with BE
   bool is_cpu_or_nnpi = (m_op_backend_name.find("CPU") == 0) ||
@@ -467,6 +469,7 @@ std::shared_ptr<ng::runtime::Tensor> NGraphEncapsulateImpl::GetCurrentNgTensor(
   } else {
     if (need_new_tensor_creation) {
       if (is_cpu_or_nnpi) {
+        cout << "Backend creating tensor with pointer: " << current_tf_ptr << "\n";
         current_ng_tensor = op_backend->create_tensor(ng_element_type, ng_shape,
                                                       current_tf_ptr);
       } else {
diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc
index e5ab0ead6..7b1976fa3 100644
--- a/ngraph_bridge/ngraph_encapsulate_op.cc
+++ b/ngraph_bridge/ngraph_encapsulate_op.cc
@@ -726,6 +726,8 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) {
   ngraph::Event::write_trace(event_copy_output);
   ngraph::Event::write_trace(event);
 
+  cout << "\n";
+
 }  // end compute
 
 int NGraphEncapsulateImpl::s_instance_count = 0;
diff --git a/test/tf_exec.cpp b/test/tf_exec.cpp
index 9cea111de..043bca0a8 100644
--- a/test/tf_exec.cpp
+++ b/test/tf_exec.cpp
@@ -138,6 +138,41 @@ TEST(tf_exec, SingleGraphOn2Threads) {
   }
 }
 
+
+TEST(tf_exec, model) {
+  string graph_name = "test_axpy.pbtxt";
+
+    unique_ptr<Session> session;
+    ASSERT_OK(CreateSession(graph_name, "CPU", session));
+
+    string inp_tensor_name_0{"x"};
+    string inp_tensor_name_1{"y"};
+    string out_tensor_name{"add"};
+    std::vector<Tensor> out_tensor_vals;
+
+    Tensor inp_tensor_val(tensorflow::DT_FLOAT,
+                            tensorflow::TensorShape({2, 3}));
+    Tensor out_tensor_expected_val(tensorflow::DT_FLOAT,
+                                      tensorflow::TensorShape({2, 3}));
+    for (int i = 0; i < 10; i++) {
+      
+      vector<float> in_vals(6, float(i));
+      AssignInputValues<float>(inp_tensor_val, in_vals);
+      
+      vector<float> out_vals(6, 6.0 * float(i));
+      AssignInputValues<float>(out_tensor_expected_val, out_vals);
+
+      std::vector<std::pair<string, tensorflow::Tensor>> inputs = {
+          {inp_tensor_name_0, inp_tensor_val},
+          {inp_tensor_name_1, inp_tensor_val}};
+
+      ASSERT_OK(
+          session->Run(inputs, {out_tensor_name}, {}, &out_tensor_vals));
+      Compare(out_tensor_vals, {out_tensor_expected_val});
+    }
+}
+
+
 TEST(tf_exec, hello_world) {
   Scope root = Scope::NewRootScope();
 

From acfd1654e6ad84b6a16934481b5ca06435e8b88d Mon Sep 17 00:00:00 2001
From: Sayantan Sarkar <sayantan.sarkar@intel.com>
Date: Fri, 6 Dec 2019 11:20:37 -0800
Subject: [PATCH 06/18] Some refactoring, Style and add back NG_TRACE for
 output

---
 ngraph_bridge/ngraph_encapsulate_impl.cc |  4 +-
 ngraph_bridge/ngraph_encapsulate_op.cc   | 75 ++++++++++--------------
 test/test_utilities.h                    |  1 +
 test/tf_exec.cpp                         | 55 ++++++++---------
 4 files changed, 58 insertions(+), 77 deletions(-)

diff --git a/ngraph_bridge/ngraph_encapsulate_impl.cc b/ngraph_bridge/ngraph_encapsulate_impl.cc
index e6049dad3..d099600d7 100644
--- a/ngraph_bridge/ngraph_encapsulate_impl.cc
+++ b/ngraph_bridge/ngraph_encapsulate_impl.cc
@@ -454,7 +454,8 @@ std::shared_ptr<ng::runtime::Tensor> NGraphEncapsulateImpl::GetCurrentNgTensor(
   } else {
     if (need_new_tensor_creation) {
       if (is_cpu_or_nnpi) {
-        cout << "Backend creating tensor with pointer: " << current_tf_ptr << "\n";
+        cout << "Backend creating tensor with pointer: " << current_tf_ptr
+             << "\n";
         current_ng_tensor = op_backend->create_tensor(ng_element_type, ng_shape,
                                                       current_tf_ptr);
       } else {
@@ -581,7 +582,6 @@ void NGraphEncapsulateImpl::DumpNgFunction(
   StringToFile(file_name, m_serialized_ng_function_map[ng_exec]);
 }
 
-
 Status NGraphEncapsulateImpl::GetPersistentTFOutputTensor(
     std::shared_ptr<ngraph::runtime::Executable> exec,
     std::vector<tensorflow::PersistentTensor>& tf_output_tensors) {
diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc
index 954f7295f..95eba94dd 100644
--- a/ngraph_bridge/ngraph_encapsulate_op.cc
+++ b/ngraph_bridge/ngraph_encapsulate_op.cc
@@ -209,7 +209,6 @@ NGraphEncapsulateOp::NGraphEncapsulateOp(OpKernelConstruction* ctx)
                  << (ng_encap_impl.GetExecCanCreateTensor() ? "" : "not")
                  << " create tensors";
 
-
   const char* not_persistent_flag = std::getenv("NGRAPH_TF_NOT_PERSISTENT");
   m_use_persistent = (not_persistent_flag == nullptr);
 
@@ -357,51 +356,18 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) {
   vector<shared_ptr<ng::runtime::Tensor>> ng_outputs;
   std::vector<Tensor*> tf_output_tensors;
   std::vector<tensorflow::PersistentTensor> cached_persistent_output_tensors;
+  bool present_in_cache = false;
 
-  if (m_use_persistent) {
-    bool present_in_cache = false;
-    present_in_cache = ng_encap_impl.PersistentOutputsExist(ng_exec);
-    if (present_in_cache) {
-      ng_encap_impl.GetPersistentTFOutputTensor(
-          ng_exec, cached_persistent_output_tensors);
-    }
-
-    for (auto i = 0; i < ng_exec->get_results().size(); i++) {
-      auto ng_element = ng_exec->get_results()[i];
-      auto ng_shape = ng_element->get_shape();
-      auto ng_element_type = ng_element->get_element_type();
-      // Create the TF output tensor
-      vector<int64> dims;
-      for (auto dim : ng_shape) {
-        dims.push_back(dim);
-      }
-      TensorShape tf_shape(dims);
-      Tensor* output_tensor = nullptr;
+  {
+    NG_TRACE("NGTF_Output_Alloc", "");
+    if (m_use_persistent) {
+      present_in_cache = ng_encap_impl.PersistentOutputsExist(ng_exec);
       if (present_in_cache) {
-        output_tensor = cached_persistent_output_tensors[i].AccessTensor(ctx);
-      } else {
-        // create a persistent tensor
-          PersistentTensor out_persistent;
-          OP_REQUIRES_OK(ctx, ctx->allocate_persistent(
-                                  ctx->expected_output_dtype(i), tf_shape,
-                                  &out_persistent, &output_tensor));
-          cout << "ctx->expected_output_dtype(i): "
-              << ctx->expected_output_dtype(i) << "\n";
-          cout << "Creation time: " << output_tensor->dtype() << "\n";
-          cached_persistent_output_tensors.push_back(out_persistent);
+        ng_encap_impl.GetPersistentTFOutputTensor(
+            ng_exec, cached_persistent_output_tensors);
       }
-      tf_output_tensors.push_back(output_tensor);
     }
-    if (!present_in_cache){
-      OP_REQUIRES_OK(ctx, ng_encap_impl.RegisterOutputPersistentTensors(
-                                ng_exec, cached_persistent_output_tensors));
-    }
-    OP_REQUIRES_OK(ctx, ng_encap_impl.AllocateNGOutputTensors(
-                          tf_output_tensors, ng_exec, out_group_from_pipeline,
-                          op_backend, ng_outputs));
-  } else
-  {
-    NG_TRACE("NGTF_Output_Alloc", "");
+
     for (auto i = 0; i < ng_exec->get_results().size(); i++) {
       auto ng_element = ng_exec->get_results()[i];
       auto ng_shape = ng_element->get_shape();
@@ -414,12 +380,13 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) {
       }
       TensorShape tf_shape(dims);
       Tensor* output_tensor = nullptr;
-      OP_REQUIRES_OK(ctx, ctx->allocate_output(i, tf_shape, &output_tensor));
-      tf_output_tensors.push_back(output_tensor);
 
       // Make sure the nGraph-inferred element type agrees with what TensorFlow
       // expected.
       ng::element::Type expected_elem_type;
+      // TODO, we only need to do these checks once when the exec was
+      // created/compiled, not again and again
+
       OP_REQUIRES_OK(
           ctx, TFDataTypeToNGraphElementType(ctx->expected_output_dtype(i),
                                              &expected_elem_type));
@@ -427,8 +394,26 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) {
           ctx, ng_element_type == expected_elem_type,
           errors::Internal("Element type inferred by nGraph does not match "
                            "the element type expected by TensorFlow"));
-    }
 
+      if (m_use_persistent) {
+        if (present_in_cache) {
+          output_tensor = cached_persistent_output_tensors[i].AccessTensor(ctx);
+        } else {
+          // create a persistent tensor
+          PersistentTensor out_persistent;
+          OP_REQUIRES_OK(ctx, ctx->allocate_persistent(
+                                  ctx->expected_output_dtype(i), tf_shape,
+                                  &out_persistent, &output_tensor));
+          cout << "ctx->expected_output_dtype(i): "
+               << ctx->expected_output_dtype(i) << "\n";
+          cout << "Creation time: " << output_tensor->dtype() << "\n";
+          cached_persistent_output_tensors.push_back(out_persistent);
+        }
+      } else {
+        OP_REQUIRES_OK(ctx, ctx->allocate_output(i, tf_shape, &output_tensor));
+      }
+      tf_output_tensors.push_back(output_tensor);
+    }
     OP_REQUIRES_OK(ctx, ng_encap_impl.AllocateNGOutputTensors(
                             tf_output_tensors, ng_exec, out_group_from_pipeline,
                             op_backend, ng_outputs));
diff --git a/test/test_utilities.h b/test/test_utilities.h
index 349ddf784..15a269ba9 100644
--- a/test/test_utilities.h
+++ b/test/test_utilities.h
@@ -147,6 +147,7 @@ void Compare(const Tensor& T1, const Tensor& T2,
   for (int k = 0; k < T_size; k++) {
     auto a = T1_data[k];
     auto b = T2_data[k];
+    cout << "a: " << a << ", b: " << b << "\n";
     bool rt = Compare<T>(a, b, rtol, atol);
     EXPECT_TRUE(rt) << " TF output " << a << endl << " NG output " << b;
   }
diff --git a/test/tf_exec.cpp b/test/tf_exec.cpp
index 043bca0a8..6609302da 100644
--- a/test/tf_exec.cpp
+++ b/test/tf_exec.cpp
@@ -138,40 +138,35 @@ TEST(tf_exec, SingleGraphOn2Threads) {
   }
 }
 
-
 TEST(tf_exec, model) {
   string graph_name = "test_axpy.pbtxt";
 
-    unique_ptr<Session> session;
-    ASSERT_OK(CreateSession(graph_name, "CPU", session));
-
-    string inp_tensor_name_0{"x"};
-    string inp_tensor_name_1{"y"};
-    string out_tensor_name{"add"};
-    std::vector<Tensor> out_tensor_vals;
-
-    Tensor inp_tensor_val(tensorflow::DT_FLOAT,
-                            tensorflow::TensorShape({2, 3}));
-    Tensor out_tensor_expected_val(tensorflow::DT_FLOAT,
-                                      tensorflow::TensorShape({2, 3}));
-    for (int i = 0; i < 10; i++) {
-      
-      vector<float> in_vals(6, float(i));
-      AssignInputValues<float>(inp_tensor_val, in_vals);
-      
-      vector<float> out_vals(6, 6.0 * float(i));
-      AssignInputValues<float>(out_tensor_expected_val, out_vals);
-
-      std::vector<std::pair<string, tensorflow::Tensor>> inputs = {
-          {inp_tensor_name_0, inp_tensor_val},
-          {inp_tensor_name_1, inp_tensor_val}};
-
-      ASSERT_OK(
-          session->Run(inputs, {out_tensor_name}, {}, &out_tensor_vals));
-      Compare(out_tensor_vals, {out_tensor_expected_val});
-    }
-}
+  unique_ptr<Session> session;
+  ASSERT_OK(CreateSession(graph_name, "CPU", session));
+
+  string inp_tensor_name_0{"x"};
+  string inp_tensor_name_1{"y"};
+  string out_tensor_name{"add"};
+  std::vector<Tensor> out_tensor_vals;
 
+  Tensor inp_tensor_val(tensorflow::DT_FLOAT, tensorflow::TensorShape({2, 3}));
+  Tensor out_tensor_expected_val(tensorflow::DT_FLOAT,
+                                 tensorflow::TensorShape({2, 3}));
+  for (int i = 0; i < 10; i++) {
+    vector<float> in_vals(6, float(i));
+    AssignInputValues<float>(inp_tensor_val, in_vals);
+
+    vector<float> out_vals(6, 6.0 * float(i));
+    AssignInputValues<float>(out_tensor_expected_val, out_vals);
+
+    std::vector<std::pair<string, tensorflow::Tensor>> inputs = {
+        {inp_tensor_name_0, inp_tensor_val},
+        {inp_tensor_name_1, inp_tensor_val}};
+
+    ASSERT_OK(session->Run(inputs, {out_tensor_name}, {}, &out_tensor_vals));
+    Compare(out_tensor_vals, {out_tensor_expected_val});
+  }
+}
 
 TEST(tf_exec, hello_world) {
   Scope root = Scope::NewRootScope();

From bd4e4f18949330ccaf677af361ed7b86e7e7d0b4 Mon Sep 17 00:00:00 2001
From: Sayantan Sarkar <sayantan.sarkar@intel.com>
Date: Fri, 6 Dec 2019 11:32:23 -0800
Subject: [PATCH 07/18] Remove print from compare

---
 test/test_utilities.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/test_utilities.h b/test/test_utilities.h
index 15a269ba9..349ddf784 100644
--- a/test/test_utilities.h
+++ b/test/test_utilities.h
@@ -147,7 +147,6 @@ void Compare(const Tensor& T1, const Tensor& T2,
   for (int k = 0; k < T_size; k++) {
     auto a = T1_data[k];
     auto b = T2_data[k];
-    cout << "a: " << a << ", b: " << b << "\n";
     bool rt = Compare<T>(a, b, rtol, atol);
     EXPECT_TRUE(rt) << " TF output " << a << endl << " NG output " << b;
   }

From 6d8651ded0744cdfc8eae9602fa7974957eebefa Mon Sep 17 00:00:00 2001
From: Sayantan Sarkar <sayantan.sarkar@intel.com>
Date: Fri, 6 Dec 2019 11:52:23 -0800
Subject: [PATCH 08/18] Fix

---
 ngraph_bridge/ngraph_encapsulate_op.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc
index 95eba94dd..41c268265 100644
--- a/ngraph_bridge/ngraph_encapsulate_op.cc
+++ b/ngraph_bridge/ngraph_encapsulate_op.cc
@@ -414,6 +414,10 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) {
       }
       tf_output_tensors.push_back(output_tensor);
     }
+    if (!present_in_cache) {
+      OP_REQUIRES_OK(ctx, ng_encap_impl.RegisterOutputPersistentTensors(
+                              ng_exec, cached_persistent_output_tensors));
+    }
     OP_REQUIRES_OK(ctx, ng_encap_impl.AllocateNGOutputTensors(
                             tf_output_tensors, ng_exec, out_group_from_pipeline,
                             op_backend, ng_outputs));

From e1f5ca64455a682cf304a69ceb4ebb7854ae61e3 Mon Sep 17 00:00:00 2001
From: Sayantan Sarkar <sayantan.sarkar@intel.com>
Date: Mon, 9 Dec 2019 00:00:18 -0800
Subject: [PATCH 09/18] Setting output tensor slightly differently

---
 ngraph_bridge/ngraph_encapsulate_op.cc | 17 +++++-----
 test/tf_exec.cpp                       | 43 ++++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 7 deletions(-)

diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc
index 41c268265..19e8a87d1 100644
--- a/ngraph_bridge/ngraph_encapsulate_op.cc
+++ b/ngraph_bridge/ngraph_encapsulate_op.cc
@@ -363,8 +363,8 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) {
     if (m_use_persistent) {
       present_in_cache = ng_encap_impl.PersistentOutputsExist(ng_exec);
       if (present_in_cache) {
-        ng_encap_impl.GetPersistentTFOutputTensor(
-            ng_exec, cached_persistent_output_tensors);
+        OP_REQUIRES_OK(ctx, ng_encap_impl.GetPersistentTFOutputTensor(
+                                ng_exec, cached_persistent_output_tensors));
       }
     }
 
@@ -657,14 +657,17 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) {
 
   if (m_use_persistent) {
     for (int out_idx = 0; out_idx < ng_exec->get_results().size(); out_idx++) {
+      OP_REQUIRES_OK(ctx, ng_encap_impl.GetPersistentTFOutputTensor(
+                              ng_exec, cached_persistent_output_tensors));
+      auto out_tensor =
+          cached_persistent_output_tensors[out_idx].AccessTensor(ctx);
+
       // TODO uncomment
       cout << "----\nSetting output " << out_idx << " --- "
-           << tf_output_tensors[out_idx]->dtype() << " "
-           << tf_output_tensors[out_idx]->NumElements() << "----\n";
+           << out_tensor->dtype() << " " << out_tensor->NumElements()
+           << "----\n";
       // ctx->set_output(out_idx, *tf_output_tensors[out_idx]);
-      ctx->set_output(
-          out_idx,
-          *(cached_persistent_output_tensors[out_idx].AccessTensor(ctx)));
+      ctx->set_output(out_idx, *out_tensor);
     }
   }
 
diff --git a/test/tf_exec.cpp b/test/tf_exec.cpp
index 6609302da..fb8559e84 100644
--- a/test/tf_exec.cpp
+++ b/test/tf_exec.cpp
@@ -238,6 +238,49 @@ TEST(tf_exec, axpy) {
   }
 }
 
+TEST(tf_exec, axpyIntermediate) {
+  GraphDef gdef;
+  auto status = ReadTextProto(Env::Default(), "test_axpy.pbtxt", &gdef);
+  ASSERT_TRUE(status == Status::OK()) << "Can't read protobuf graph";
+
+  SessionOptions options;
+  ConfigProto& config = options.config;
+  config.set_allow_soft_placement(true);
+  std::unique_ptr<Session> session(NewSession(options));
+
+  ASSERT_OK(session->Create(gdef));
+
+  // Create the inputs for this graph
+  Tensor x(DT_FLOAT, TensorShape({2, 3}));
+  auto x_flat = x.flat<float>();
+  for (int i = 0; i < x_flat.size(); i++) {
+    x_flat.data()[i] = 1.0;
+  }
+
+  Tensor y(DT_FLOAT, TensorShape({2, 3}));
+  auto y_flat = y.flat<float>();
+  for (int i = 0; i < y_flat.size(); i++) {
+    y_flat.data()[i] = 1.0;
+  }
+
+  std::vector<Tensor> outputs;
+
+  ASSERT_OK(session->Run({{"x", x}, {"y", y}}, {"mul"}, {}, &outputs));
+
+  ASSERT_EQ(outputs.size(), 1);
+  auto mat1 = outputs[0].matrix<float>();
+  EXPECT_FLOAT_EQ(5.0, mat1(0, 0));
+  EXPECT_FLOAT_EQ(5.0, mat1(1, 0));
+
+  for (auto output : outputs) {
+    auto output_flat = output.flat<float>();
+    for (int i = 0; i < x_flat.size(); i++) {
+      cout << output_flat.data()[i] << " ";
+    }
+    cout << endl;
+  }
+}
+
 TEST(tf_exec, DISABLED_BatchMatMul_0D) {
   Scope root = Scope::NewRootScope();
   auto dev_scope = root.WithDevice("/device:NGRAPH:0");

From b25e4824da533e514c33d999ad124fd3d7a9b8af Mon Sep 17 00:00:00 2001
From: Sayantan Sarkar <sayantan.sarkar@intel.com>
Date: Mon, 9 Dec 2019 00:47:33 -0800
Subject: [PATCH 10/18] Fixing allocate_persistent call for cases with >1
 output

---
 ngraph_bridge/ngraph_encapsulate_op.cc | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc
index 19e8a87d1..4d27d1b2d 100644
--- a/ngraph_bridge/ngraph_encapsulate_op.cc
+++ b/ngraph_bridge/ngraph_encapsulate_op.cc
@@ -355,7 +355,8 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) {
   // Allocate tensors for the output results.
   vector<shared_ptr<ng::runtime::Tensor>> ng_outputs;
   std::vector<Tensor*> tf_output_tensors;
-  std::vector<tensorflow::PersistentTensor> cached_persistent_output_tensors;
+  std::vector<tensorflow::PersistentTensor> cached_persistent_output_tensors(
+      ng_exec->get_results().size());
   bool present_in_cache = false;
 
   {
@@ -400,14 +401,10 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) {
           output_tensor = cached_persistent_output_tensors[i].AccessTensor(ctx);
         } else {
           // create a persistent tensor
-          PersistentTensor out_persistent;
-          OP_REQUIRES_OK(ctx, ctx->allocate_persistent(
-                                  ctx->expected_output_dtype(i), tf_shape,
-                                  &out_persistent, &output_tensor));
-          cout << "ctx->expected_output_dtype(i): "
-               << ctx->expected_output_dtype(i) << "\n";
-          cout << "Creation time: " << output_tensor->dtype() << "\n";
-          cached_persistent_output_tensors.push_back(out_persistent);
+          OP_REQUIRES_OK(
+              ctx, ctx->allocate_persistent(
+                       ctx->expected_output_dtype(i), tf_shape,
+                       &cached_persistent_output_tensors[i], &output_tensor));
         }
       } else {
         OP_REQUIRES_OK(ctx, ctx->allocate_output(i, tf_shape, &output_tensor));

From cf5b58c6f0ddf45cc9a0d6f711af391bf47195ad Mon Sep 17 00:00:00 2001
From: Sayantan Sarkar <sayantan.sarkar@intel.com>
Date: Mon, 9 Dec 2019 00:48:28 -0800
Subject: [PATCH 11/18] Cleanup

---
 ngraph_bridge/ngraph_encapsulate_op.cc | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc
index 4d27d1b2d..444a8c29b 100644
--- a/ngraph_bridge/ngraph_encapsulate_op.cc
+++ b/ngraph_bridge/ngraph_encapsulate_op.cc
@@ -658,17 +658,9 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) {
                               ng_exec, cached_persistent_output_tensors));
       auto out_tensor =
           cached_persistent_output_tensors[out_idx].AccessTensor(ctx);
-
-      // TODO uncomment
-      cout << "----\nSetting output " << out_idx << " --- "
-           << out_tensor->dtype() << " " << out_tensor->NumElements()
-           << "----\n";
-      // ctx->set_output(out_idx, *tf_output_tensors[out_idx]);
       ctx->set_output(out_idx, *out_tensor);
     }
   }
-
-  cout << "\n";
 }  // end compute
 
 int NGraphEncapsulateImpl::s_instance_count = 0;

From 34049e1b07de366afcce7f7d22cbfa827560179f Mon Sep 17 00:00:00 2001
From: Sayantan Sarkar <sayantan.sarkar@intel.com>
Date: Mon, 9 Dec 2019 00:50:44 -0800
Subject: [PATCH 12/18] Cleanup

---
 ngraph_bridge/ngraph_encapsulate_impl.cc | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/ngraph_bridge/ngraph_encapsulate_impl.cc b/ngraph_bridge/ngraph_encapsulate_impl.cc
index d099600d7..3e0ccad81 100644
--- a/ngraph_bridge/ngraph_encapsulate_impl.cc
+++ b/ngraph_bridge/ngraph_encapsulate_impl.cc
@@ -320,7 +320,6 @@ Status NGraphEncapsulateImpl::AllocateNGInputTensors(
         current_src_ptr, last_src_ptr, last_ng_tensor, false, ng_exec,
         op_backend, ng_element_type, ng_shape,
         m_executable_can_create_tensor ? inp_group_from_pipeline[i] : nullptr);
-    cout << "Using input tensor ptr: " << current_ng_tensor << "\n";
     bool is_cpu = m_op_backend_name == "CPU";
 
     if (!is_cpu && current_ng_tensor->get_stale()) {
@@ -396,7 +395,6 @@ Status NGraphEncapsulateImpl::AllocateNGOutputTensors(
         current_dst_ptr, last_dst_ptr, last_ng_tensor, true, ng_exec,
         op_backend, ng_element_type, ng_shape,
         m_executable_can_create_tensor ? out_group_from_pipeline[i] : nullptr);
-    cout << "Using output tensor ptr: " << current_ng_tensor << "\n";
     current_ng_tensor->set_stale(true);
     output_caches[i] = std::make_pair(current_dst_ptr, current_ng_tensor);
     ng_outputs.push_back(current_ng_tensor);
@@ -417,7 +415,7 @@ std::shared_ptr<ng::runtime::Tensor> NGraphEncapsulateImpl::GetCurrentNgTensor(
   // NOTE: we assume that TF's pointers WILL change if it actually changes
   // values. ie, it will not reuse the same space if its rewritten it
   bool tf_tensor_has_changed = current_tf_ptr != last_tf_ptr;
-  cout << "tf_tensor_has_changed: " << tf_tensor_has_changed << "\n";
+  NGRAPH_VLOG(5) << "tf_tensor_has_changed: " << tf_tensor_has_changed;
   bool no_ng_tensor_found = last_ng_tensor == nullptr;
   // m_op_backend_name might be BE:0, check if it starts with BE
   bool is_cpu_or_nnpi = (m_op_backend_name.find("CPU") == 0) ||
@@ -454,8 +452,8 @@ std::shared_ptr<ng::runtime::Tensor> NGraphEncapsulateImpl::GetCurrentNgTensor(
   } else {
     if (need_new_tensor_creation) {
       if (is_cpu_or_nnpi) {
-        cout << "Backend creating tensor with pointer: " << current_tf_ptr
-             << "\n";
+        NGRAPH_VLOG(5) << "Backend creating tensor with pointer: "
+                       << current_tf_ptr;
         current_ng_tensor = op_backend->create_tensor(ng_element_type, ng_shape,
                                                       current_tf_ptr);
       } else {
@@ -609,7 +607,6 @@ Status NGraphEncapsulateImpl::RegisterOutputPersistentTensors(
         "Found an entry already exists in the cache for persistent tensors");
   }
   m_out_persistents.emplace(exec, persistent_tensors);
-  cout << "m_out_persistents: " << m_out_persistents.size() << "\n";
   return Status::OK();
 }
 

From 70dc3af5af91843eb8dba60f2c0eefca76d4bb80 Mon Sep 17 00:00:00 2001
From: Sayantan Sarkar <sayantan.sarkar@intel.com>
Date: Mon, 9 Dec 2019 01:28:24 -0800
Subject: [PATCH 13/18] Fix for multithreaded case

---
 ngraph_bridge/ngraph_encapsulate_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc
index 444a8c29b..589581e9b 100644
--- a/ngraph_bridge/ngraph_encapsulate_op.cc
+++ b/ngraph_bridge/ngraph_encapsulate_op.cc
@@ -411,7 +411,7 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) {
       }
       tf_output_tensors.push_back(output_tensor);
     }
-    if (!present_in_cache) {
+    if (m_use_persistent && !present_in_cache) {
       OP_REQUIRES_OK(ctx, ng_encap_impl.RegisterOutputPersistentTensors(
                               ng_exec, cached_persistent_output_tensors));
     }

From e6b5431d285f9542aff6c8517a245a1ded496865 Mon Sep 17 00:00:00 2001
From: Sayantan Sarkar <sayantan.sarkar@intel.com>
Date: Mon, 9 Dec 2019 02:21:54 -0800
Subject: [PATCH 14/18] Do not use persistenttensor for multithreaded case

---
 test/tf_exec.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/test/tf_exec.cpp b/test/tf_exec.cpp
index fb8559e84..8e3dd5162 100644
--- a/test/tf_exec.cpp
+++ b/test/tf_exec.cpp
@@ -95,7 +95,16 @@ Status CreateSession(const string& graph_filename, const string& backend_name,
   return load_graph_status;
 }
 
+// This test might fail when running with persistent output tensors
+// Maybe because once we have computed outputs out to persistent tensors,
+// the next thread comes in before Compare runs, and changes the values?
+// For example, if we add a 1sec sleep right after entering Compute(), this test
+// would pass (since we now allow enough time for compare to run before the next
+// thread comes in and modifies the persistent tensor values)
+// TODO: see how persistenttensors might fit in with this kind of multithreading
+// (symmetric parallel)
 TEST(tf_exec, SingleGraphOn2Threads) {
+  SetEnvVariable("NGRAPH_TF_NOT_PERSISTENT", "1");
   string graph_name = "test_axpy.pbtxt";
   vector<string> backends{"CPU", "INTERPRETER"};
   for (auto be : backends) {
@@ -136,6 +145,7 @@ TEST(tf_exec, SingleGraphOn2Threads) {
     thread0.join();
     thread1.join();
   }
+  UnsetEnvVariable("NGRAPH_TF_NOT_PERSISTENT");
 }
 
 TEST(tf_exec, model) {

From b8ad7b64ea1e30e4fef1bd73a8837041377e0d2a Mon Sep 17 00:00:00 2001
From: Sayantan Sarkar <sayantan.sarkar@intel.com>
Date: Wed, 11 Dec 2019 10:23:03 -0800
Subject: [PATCH 15/18] Remove temp tests

---
 test/tf_exec.cpp | 73 ------------------------------------------------
 1 file changed, 73 deletions(-)

diff --git a/test/tf_exec.cpp b/test/tf_exec.cpp
index 8e3dd5162..843699710 100644
--- a/test/tf_exec.cpp
+++ b/test/tf_exec.cpp
@@ -148,36 +148,6 @@ TEST(tf_exec, SingleGraphOn2Threads) {
   UnsetEnvVariable("NGRAPH_TF_NOT_PERSISTENT");
 }
 
-TEST(tf_exec, model) {
-  string graph_name = "test_axpy.pbtxt";
-
-  unique_ptr<Session> session;
-  ASSERT_OK(CreateSession(graph_name, "CPU", session));
-
-  string inp_tensor_name_0{"x"};
-  string inp_tensor_name_1{"y"};
-  string out_tensor_name{"add"};
-  std::vector<Tensor> out_tensor_vals;
-
-  Tensor inp_tensor_val(tensorflow::DT_FLOAT, tensorflow::TensorShape({2, 3}));
-  Tensor out_tensor_expected_val(tensorflow::DT_FLOAT,
-                                 tensorflow::TensorShape({2, 3}));
-  for (int i = 0; i < 10; i++) {
-    vector<float> in_vals(6, float(i));
-    AssignInputValues<float>(inp_tensor_val, in_vals);
-
-    vector<float> out_vals(6, 6.0 * float(i));
-    AssignInputValues<float>(out_tensor_expected_val, out_vals);
-
-    std::vector<std::pair<string, tensorflow::Tensor>> inputs = {
-        {inp_tensor_name_0, inp_tensor_val},
-        {inp_tensor_name_1, inp_tensor_val}};
-
-    ASSERT_OK(session->Run(inputs, {out_tensor_name}, {}, &out_tensor_vals));
-    Compare(out_tensor_vals, {out_tensor_expected_val});
-  }
-}
-
 TEST(tf_exec, hello_world) {
   Scope root = Scope::NewRootScope();
 
@@ -248,49 +218,6 @@ TEST(tf_exec, axpy) {
   }
 }
 
-TEST(tf_exec, axpyIntermediate) {
-  GraphDef gdef;
-  auto status = ReadTextProto(Env::Default(), "test_axpy.pbtxt", &gdef);
-  ASSERT_TRUE(status == Status::OK()) << "Can't read protobuf graph";
-
-  SessionOptions options;
-  ConfigProto& config = options.config;
-  config.set_allow_soft_placement(true);
-  std::unique_ptr<Session> session(NewSession(options));
-
-  ASSERT_OK(session->Create(gdef));
-
-  // Create the inputs for this graph
-  Tensor x(DT_FLOAT, TensorShape({2, 3}));
-  auto x_flat = x.flat<float>();
-  for (int i = 0; i < x_flat.size(); i++) {
-    x_flat.data()[i] = 1.0;
-  }
-
-  Tensor y(DT_FLOAT, TensorShape({2, 3}));
-  auto y_flat = y.flat<float>();
-  for (int i = 0; i < y_flat.size(); i++) {
-    y_flat.data()[i] = 1.0;
-  }
-
-  std::vector<Tensor> outputs;
-
-  ASSERT_OK(session->Run({{"x", x}, {"y", y}}, {"mul"}, {}, &outputs));
-
-  ASSERT_EQ(outputs.size(), 1);
-  auto mat1 = outputs[0].matrix<float>();
-  EXPECT_FLOAT_EQ(5.0, mat1(0, 0));
-  EXPECT_FLOAT_EQ(5.0, mat1(1, 0));
-
-  for (auto output : outputs) {
-    auto output_flat = output.flat<float>();
-    for (int i = 0; i < x_flat.size(); i++) {
-      cout << output_flat.data()[i] << " ";
-    }
-    cout << endl;
-  }
-}
-
 TEST(tf_exec, DISABLED_BatchMatMul_0D) {
   Scope root = Scope::NewRootScope();
   auto dev_scope = root.WithDevice("/device:NGRAPH:0");

From f394d2e2c86281ddee6e0aed1823ef11d188566f Mon Sep 17 00:00:00 2001
From: Sayantan Sarkar <sayantan.sarkar@intel.com>
Date: Wed, 11 Dec 2019 10:25:18 -0800
Subject: [PATCH 16/18] Changing flag name

---
 ngraph_bridge/ngraph_encapsulate_op.cc | 2 +-
 test/tf_exec.cpp                       | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc
index 589581e9b..ffdc5b33b 100644
--- a/ngraph_bridge/ngraph_encapsulate_op.cc
+++ b/ngraph_bridge/ngraph_encapsulate_op.cc
@@ -209,7 +209,7 @@ NGraphEncapsulateOp::NGraphEncapsulateOp(OpKernelConstruction* ctx)
                  << (ng_encap_impl.GetExecCanCreateTensor() ? "" : "not")
                  << " create tensors";
 
-  const char* not_persistent_flag = std::getenv("NGRAPH_TF_NOT_PERSISTENT");
+  const char* not_persistent_flag = std::getenv("NGRAPH_TF_DISABLE_PERSISTENT");
   m_use_persistent = (not_persistent_flag == nullptr);
 
   event.Stop();
diff --git a/test/tf_exec.cpp b/test/tf_exec.cpp
index 843699710..65b1d7ccd 100644
--- a/test/tf_exec.cpp
+++ b/test/tf_exec.cpp
@@ -104,7 +104,7 @@ Status CreateSession(const string& graph_filename, const string& backend_name,
 // TODO: see how persistenttensors might fit in with this kind of multithreading
 // (symmetric parallel)
 TEST(tf_exec, SingleGraphOn2Threads) {
-  SetEnvVariable("NGRAPH_TF_NOT_PERSISTENT", "1");
+  SetEnvVariable("NGRAPH_TF_DISABLE_PERSISTENT", "1");
   string graph_name = "test_axpy.pbtxt";
   vector<string> backends{"CPU", "INTERPRETER"};
   for (auto be : backends) {
@@ -145,7 +145,7 @@ TEST(tf_exec, SingleGraphOn2Threads) {
     thread0.join();
     thread1.join();
   }
-  UnsetEnvVariable("NGRAPH_TF_NOT_PERSISTENT");
+  UnsetEnvVariable("NGRAPH_TF_DISABLE_PERSISTENT");
 }
 
 TEST(tf_exec, hello_world) {

From 8e3ad6008f54de769e6ca6f5556b01e629e45953 Mon Sep 17 00:00:00 2001
From: Sayantan Sarkar <sayantan.sarkar@intel.com>
Date: Wed, 11 Dec 2019 10:27:20 -0800
Subject: [PATCH 17/18] Minor

---
 ngraph_bridge/ngraph_encapsulate_impl.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ngraph_bridge/ngraph_encapsulate_impl.h b/ngraph_bridge/ngraph_encapsulate_impl.h
index 024aa1901..2eb9c6935 100644
--- a/ngraph_bridge/ngraph_encapsulate_impl.h
+++ b/ngraph_bridge/ngraph_encapsulate_impl.h
@@ -264,7 +264,6 @@ class NGraphEncapsulateImpl {
 
   // each executable (which comes from a new shape) corresponds to a vector of
   // output tensors
-  // TODO: give better name
   // TODO: Should the vector store PersistentTensor or PersistentTensor* ?
   std::unordered_map<std::shared_ptr<ngraph::runtime::Executable>,
                      std::vector<tensorflow::PersistentTensor>>

From a6eef4b542b8e8bccf59e6c00a39b731e0e5dff1 Mon Sep 17 00:00:00 2001
From: Sayantan Sarkar <sayantan.sarkar@intel.com>
Date: Wed, 11 Dec 2019 16:46:33 -0800
Subject: [PATCH 18/18] Change function name according to review comments

---
 ngraph_bridge/ngraph_encapsulate_impl.cc | 2 +-
 ngraph_bridge/ngraph_encapsulate_impl.h  | 2 +-
 ngraph_bridge/ngraph_encapsulate_op.cc   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ngraph_bridge/ngraph_encapsulate_impl.cc b/ngraph_bridge/ngraph_encapsulate_impl.cc
index 3e0ccad81..0931be42f 100644
--- a/ngraph_bridge/ngraph_encapsulate_impl.cc
+++ b/ngraph_bridge/ngraph_encapsulate_impl.cc
@@ -598,7 +598,7 @@ bool NGraphEncapsulateImpl::PersistentOutputsExist(
   return m_out_persistents.find(exec) != m_out_persistents.end();
 }
 
-Status NGraphEncapsulateImpl::RegisterOutputPersistentTensors(
+Status NGraphEncapsulateImpl::RegisterPersistentOutputTensors(
     std::shared_ptr<ngraph::runtime::Executable> exec,
     std::vector<tensorflow::PersistentTensor> persistent_tensors) {
   auto itr = m_out_persistents.find(exec);
diff --git a/ngraph_bridge/ngraph_encapsulate_impl.h b/ngraph_bridge/ngraph_encapsulate_impl.h
index 2eb9c6935..d5fbe0e09 100644
--- a/ngraph_bridge/ngraph_encapsulate_impl.h
+++ b/ngraph_bridge/ngraph_encapsulate_impl.h
@@ -182,7 +182,7 @@ class NGraphEncapsulateImpl {
 
   bool PersistentOutputsExist(std::shared_ptr<ngraph::runtime::Executable>);
 
-  Status RegisterOutputPersistentTensors(
+  Status RegisterPersistentOutputTensors(
       std::shared_ptr<ngraph::runtime::Executable>,
       std::vector<tensorflow::PersistentTensor>);
 
diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc
index ffdc5b33b..2759319a7 100644
--- a/ngraph_bridge/ngraph_encapsulate_op.cc
+++ b/ngraph_bridge/ngraph_encapsulate_op.cc
@@ -412,7 +412,7 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) {
       tf_output_tensors.push_back(output_tensor);
     }
     if (m_use_persistent && !present_in_cache) {
-      OP_REQUIRES_OK(ctx, ng_encap_impl.RegisterOutputPersistentTensors(
+      OP_REQUIRES_OK(ctx, ng_encap_impl.RegisterPersistentOutputTensors(
                               ng_exec, cached_persistent_output_tensors));
     }
     OP_REQUIRES_OK(ctx, ng_encap_impl.AllocateNGOutputTensors(