From cafa3a4a18428c877c6e45386b4a3a3acec6be9e Mon Sep 17 00:00:00 2001
From: sindhu-nervana <sindhura.kantamani@intel.com>
Date: Wed, 18 Dec 2019 15:22:37 -0800
Subject: [PATCH 01/13] register backends

---
 build_ngtf.py                                   | 2 +-
 ngraph_bridge/ngraph_backend_manager.cc         | 2 ++
 test/graph_rewrites/op_by_op_capability_test.cc | 3 +--
 test/test_ngraph_exec.cpp                       | 2 ++
 4 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/build_ngtf.py b/build_ngtf.py
index f6127829d..c7bfddd6c 100755
--- a/build_ngtf.py
+++ b/build_ngtf.py
@@ -53,7 +53,7 @@ def main():
     '''
 
     # Component versions
-    ngraph_version = "v0.27.1-rc.1"
+    ngraph_version = "silee2/control_export"
     tf_version = "v1.14.0"
 
     # Command line parser options
diff --git a/ngraph_bridge/ngraph_backend_manager.cc b/ngraph_bridge/ngraph_backend_manager.cc
index 5e02ad4cd..20b11511e 100644
--- a/ngraph_bridge/ngraph_backend_manager.cc
+++ b/ngraph_bridge/ngraph_backend_manager.cc
@@ -45,6 +45,8 @@ Status BackendManager::SetBackendName(const string& backend_name) {
 }
 
 Status BackendManager::CreateBackend(const string& backend_name) {
+  ngraph_register_cpu_backend();
+  ngraph_register_interpreter_backend();
   std::lock_guard<std::mutex> lock(BackendManager::ng_backend_map_mutex_);
   auto itr = BackendManager::ng_backend_map_.find(backend_name);
   // if backend does not exist create it
diff --git a/test/graph_rewrites/op_by_op_capability_test.cc b/test/graph_rewrites/op_by_op_capability_test.cc
index 1dd05756e..923104c9f 100644
--- a/test/graph_rewrites/op_by_op_capability_test.cc
+++ b/test/graph_rewrites/op_by_op_capability_test.cc
@@ -62,8 +62,7 @@ TEST(OpByOpCapability, Backend) {
   ASSERT_OK(BackendManager::GetCurrentlySetBackendName(&ng_backend_type));
 
   // Map with all the backends, and what the boolean is_supported should be
-  std::map<std::string, bool> backend_map{
-      {"CPU", true}, {"INTERPRETER", true}, {"NOP", false}};
+  std::map<std::string, bool> backend_map{{"CPU", true}, {"INTERPRETER", true}};
 
   // Tests three cases of (Backend, is_supported=true/false)
   // 1. CPU, true
diff --git a/test/test_ngraph_exec.cpp b/test/test_ngraph_exec.cpp
index f34f8a53e..7ece5add8 100644
--- a/test/test_ngraph_exec.cpp
+++ b/test/test_ngraph_exec.cpp
@@ -44,6 +44,8 @@ class NGraphExecTest : public ::testing::Test {
     GraphDef gdef;
     TF_RETURN_IF_ERROR(ReadTextProto(Env::Default(), graph_pbtxt_file, &gdef));
     GraphConstructorOptions opts;
+    ngraph_register_cpu_backend();
+    ngraph_register_interpreter_backend();
     // Set the allow_internal_ops to true so that graphs with node names such as
     // _arg_Placeholder_1_0_1_0_arg are allowed. These op names are generated
     // during the graph rewrite passes and considered internal

From 81587259b742e35b2ac8b71fb73800ac4eb583c8 Mon Sep 17 00:00:00 2001
From: sindhu-nervana <sindhura.kantamani@intel.com>
Date: Wed, 18 Dec 2019 16:43:36 -0800
Subject: [PATCH 02/13] fix non static build

---
 ngraph_bridge/CMakeLists.txt            |  7 +++++++
 ngraph_bridge/ngraph_backend_manager.cc |  3 +++
 ngraph_bridge/version.cc                |  8 ++++++++
 ngraph_bridge/version.h                 |  4 ++++
 test/test_ngraph_exec.cpp               | 10 ++++++++--
 5 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/ngraph_bridge/CMakeLists.txt b/ngraph_bridge/CMakeLists.txt
index 18d218dad..45382aca4 100644
--- a/ngraph_bridge/CMakeLists.txt
+++ b/ngraph_bridge/CMakeLists.txt
@@ -98,6 +98,13 @@ if(NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS)
     add_definitions(-DNGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS)
 endif()
 
+if(NGRAPH_CPU_STATIC_LIB_ENABLE)
+    add_definitions(-DNGRAPH_CPU_STATIC_LIB_ENABLE)
+endif()
+
+if(NGRAPH_INTERPRETER_STATIC_LIB_ENABLE)
+    add_definitions(-DNGRAPH_INTERPRETER_STATIC_LIB_ENABLE)
+endif()
 
 if(NGRAPH_TF_USE_GRAPPLER_OPTIMIZER)            
     list(REMOVE_ITEM SRC ngraph_rewrite_pass.cc)
diff --git a/ngraph_bridge/ngraph_backend_manager.cc b/ngraph_bridge/ngraph_backend_manager.cc
index 20b11511e..d18f770e7 100644
--- a/ngraph_bridge/ngraph_backend_manager.cc
+++ b/ngraph_bridge/ngraph_backend_manager.cc
@@ -45,8 +45,11 @@ Status BackendManager::SetBackendName(const string& backend_name) {
 }
 
 Status BackendManager::CreateBackend(const string& backend_name) {
+// Register backends for static linking
+#if defined(NGRAPH_BRIDGE_STATIC_LIB_ENABLE)
   ngraph_register_cpu_backend();
   ngraph_register_interpreter_backend();
+#endif
   std::lock_guard<std::mutex> lock(BackendManager::ng_backend_map_mutex_);
   auto itr = BackendManager::ng_backend_map_.find(backend_name);
   // if backend does not exist create it
diff --git a/ngraph_bridge/version.cc b/ngraph_bridge/version.cc
index b47a3f206..2ae1b0694 100644
--- a/ngraph_bridge/version.cc
+++ b/ngraph_bridge/version.cc
@@ -73,6 +73,14 @@ bool ngraph_tf_are_variables_enabled() {
 #endif
 }
 
+bool ngraph_bridge_static_lib_enable() {
+#if defined(NGRAPH_BRIDGE_STATIC_LIB_ENABLE)
+  return true;
+#else
+  return false;
+#endif
+}
+
 bool ngraph_tf_is_distributed_enabled() {
 #if defined(NGRAPH_DISTRIBUTED)
   return true;
diff --git a/ngraph_bridge/version.h b/ngraph_bridge/version.h
index 40c5f06e2..acb0cd7f9 100644
--- a/ngraph_bridge/version.h
+++ b/ngraph_bridge/version.h
@@ -38,6 +38,10 @@ bool ngraph_tf_is_grappler_enabled();
 // --enable_variables_and_optimizers flag
 bool ngraph_tf_are_variables_enabled();
 
+// Returns true when nGraph-bridge is built with
+// ----use_ngraph_staticlibs flag
+bool ngraph_bridge_static_lib_enable();
+
 // Return true when nGraph-bridge is build with
 // --distributed_build flag
 bool ngraph_tf_is_distributed_enabled();
diff --git a/test/test_ngraph_exec.cpp b/test/test_ngraph_exec.cpp
index 7ece5add8..9d6025705 100644
--- a/test/test_ngraph_exec.cpp
+++ b/test/test_ngraph_exec.cpp
@@ -25,6 +25,7 @@
 #include "ngraph_bridge/ngraph_builder.h"
 #include "ngraph_bridge/ngraph_pipelined_tensors.h"
 #include "ngraph_bridge/ngraph_utils.h"
+#include "ngraph_bridge/version.h"
 
 #include "test/test_utilities.h"
 
@@ -44,8 +45,13 @@ class NGraphExecTest : public ::testing::Test {
     GraphDef gdef;
     TF_RETURN_IF_ERROR(ReadTextProto(Env::Default(), graph_pbtxt_file, &gdef));
     GraphConstructorOptions opts;
-    ngraph_register_cpu_backend();
-    ngraph_register_interpreter_backend();
+
+    // Register backends for static linking
+    if (ngraph_bridge_static_lib_enable()) {
+      ngraph_register_cpu_backend();
+      ngraph_register_interpreter_backend();
+    }
+
     // Set the allow_internal_ops to true so that graphs with node names such as
     // _arg_Placeholder_1_0_1_0_arg are allowed. These op names are generated
     // during the graph rewrite passes and considered internal

From d9922afeaaa96849e2ea9fd597e7b78732156d53 Mon Sep 17 00:00:00 2001
From: sindhu-nervana <sindhura.kantamani@intel.com>
Date: Wed, 18 Dec 2019 16:45:55 -0800
Subject: [PATCH 03/13] minor

---
 ngraph_bridge/CMakeLists.txt | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/ngraph_bridge/CMakeLists.txt b/ngraph_bridge/CMakeLists.txt
index 45382aca4..c1426e9cb 100644
--- a/ngraph_bridge/CMakeLists.txt
+++ b/ngraph_bridge/CMakeLists.txt
@@ -98,13 +98,10 @@ if(NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS)
     add_definitions(-DNGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS)
 endif()
 
-if(NGRAPH_CPU_STATIC_LIB_ENABLE)
-    add_definitions(-DNGRAPH_CPU_STATIC_LIB_ENABLE)
+if(NGRAPH_BRIDGE_STATIC_LIB_ENABLE)
+    add_definitions(-DNGRAPH_BRIDGE_STATIC_LIB_ENABLE)
 endif()
 
-if(NGRAPH_INTERPRETER_STATIC_LIB_ENABLE)
-    add_definitions(-DNGRAPH_INTERPRETER_STATIC_LIB_ENABLE)
-endif()
 
 if(NGRAPH_TF_USE_GRAPPLER_OPTIMIZER)            
     list(REMOVE_ITEM SRC ngraph_rewrite_pass.cc)

From 89fd3c70bdccebbef286f9aa39a90511d3023086 Mon Sep 17 00:00:00 2001
From: sindhu-nervana <sindhura.kantamani@intel.com>
Date: Thu, 19 Dec 2019 13:01:48 -0800
Subject: [PATCH 04/13] cmake changes

---
 CMakeLists.txt               |  3 +++
 build_ngtf.py                |  6 +++++-
 ngraph_bridge/CMakeLists.txt |  1 -
 test/CMakeLists.txt          |  4 ++++
 test/test_ngraph_exec.cpp    | 10 +++++-----
 5 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ef1b831aa..8b62725b0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -275,6 +275,9 @@ if (NOT USE_PRE_BUILT_NGRAPH)
 endif()
 set(NGRAPH_INSTALL_DIR ${NGRAPH_ARTIFACTS_DIR})
 
+if(NGRAPH_BRIDGE_STATIC_LIB_ENABLE)
+    add_definitions(-DNGRAPH_BRIDGE_STATIC_LIB_ENABLE)
+endif()
 
 if(OS_VERSION STREQUAL "\"centos\"")
     set(LIB "lib64")
diff --git a/build_ngtf.py b/build_ngtf.py
index c7bfddd6c..4961d2b6c 100755
--- a/build_ngtf.py
+++ b/build_ngtf.py
@@ -411,7 +411,11 @@ def main():
     ]
 
     if (arguments.use_ngraph_staticlibs):
-        ngraph_tf_cmake_flags.extend(["-DNGRAPH_BRIDGE_STATIC_LIB_ENABLE=TRUE"])
+        ngraph_tf_cmake_flags.extend([
+            "-DNGRAPH_BRIDGE_STATIC_LIB_ENABLE=" +
+            flag_string_map[arguments.use_ngraph_staticlibs]
+        ])
+
     if (arguments.debug_build):
         ngraph_tf_cmake_flags.extend(["-DCMAKE_BUILD_TYPE=Debug"])
 
diff --git a/ngraph_bridge/CMakeLists.txt b/ngraph_bridge/CMakeLists.txt
index c1426e9cb..acd4426e0 100644
--- a/ngraph_bridge/CMakeLists.txt
+++ b/ngraph_bridge/CMakeLists.txt
@@ -102,7 +102,6 @@ if(NGRAPH_BRIDGE_STATIC_LIB_ENABLE)
     add_definitions(-DNGRAPH_BRIDGE_STATIC_LIB_ENABLE)
 endif()
 
-
 if(NGRAPH_TF_USE_GRAPPLER_OPTIMIZER)            
     list(REMOVE_ITEM SRC ngraph_rewrite_pass.cc)
     list(APPEND SRC grappler/ngraph_optimizer.cc)
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 7648216ed..0997afa9d 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -137,6 +137,10 @@ if (APPLE)
     set(NGRAPH_TF_CXX11_ABI 0)
 endif()
 
+if(NGRAPH_BRIDGE_STATIC_LIB_ENABLE)
+    add_definitions(-DNGRAPH_BRIDGE_STATIC_LIB_ENABLE)
+endif()
+
 if(NGRAPH_BRIDGE_STATIC_LIB_ENABLE)
     target_link_libraries(gtest_ngtf
         -Wl,--whole-archive
diff --git a/test/test_ngraph_exec.cpp b/test/test_ngraph_exec.cpp
index 9d6025705..c5ae2e2b0 100644
--- a/test/test_ngraph_exec.cpp
+++ b/test/test_ngraph_exec.cpp
@@ -46,11 +46,11 @@ class NGraphExecTest : public ::testing::Test {
     TF_RETURN_IF_ERROR(ReadTextProto(Env::Default(), graph_pbtxt_file, &gdef));
     GraphConstructorOptions opts;
 
-    // Register backends for static linking
-    if (ngraph_bridge_static_lib_enable()) {
-      ngraph_register_cpu_backend();
-      ngraph_register_interpreter_backend();
-    }
+// Register backends for static linking
+#if defined(NGRAPH_BRIDGE_STATIC_LIB_ENABLE)
+    ngraph_register_cpu_backend();
+    ngraph_register_interpreter_backend();
+#endif
 
     // Set the allow_internal_ops to true so that graphs with node names such as
     // _arg_Placeholder_1_0_1_0_arg are allowed. These op names are generated

From 34c80261a3a7a5b64bee718c6c28f540eefe3559 Mon Sep 17 00:00:00 2001
From: sindhu-nervana <sindhura.kantamani@intel.com>
Date: Thu, 19 Dec 2019 13:22:44 -0800
Subject: [PATCH 05/13] separate cpu and interpreter backend register

---
 build_ngtf.py                           | 5 +----
 ngraph_bridge/CMakeLists.txt            | 9 +++++++++
 ngraph_bridge/ngraph_backend_manager.cc | 5 ++++-
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/build_ngtf.py b/build_ngtf.py
index 4961d2b6c..b267cdea5 100755
--- a/build_ngtf.py
+++ b/build_ngtf.py
@@ -411,10 +411,7 @@ def main():
     ]
 
     if (arguments.use_ngraph_staticlibs):
-        ngraph_tf_cmake_flags.extend([
-            "-DNGRAPH_BRIDGE_STATIC_LIB_ENABLE=" +
-            flag_string_map[arguments.use_ngraph_staticlibs]
-        ])
+        ngraph_tf_cmake_flags.extend(["-DNGRAPH_BRIDGE_STATIC_LIB_ENABLE=TRUE"])
 
     if (arguments.debug_build):
         ngraph_tf_cmake_flags.extend(["-DCMAKE_BUILD_TYPE=Debug"])
diff --git a/ngraph_bridge/CMakeLists.txt b/ngraph_bridge/CMakeLists.txt
index acd4426e0..df5be0699 100644
--- a/ngraph_bridge/CMakeLists.txt
+++ b/ngraph_bridge/CMakeLists.txt
@@ -102,6 +102,15 @@ if(NGRAPH_BRIDGE_STATIC_LIB_ENABLE)
     add_definitions(-DNGRAPH_BRIDGE_STATIC_LIB_ENABLE)
 endif()
 
+if(NGRAPH_CPU_STATIC_LIB_ENABLE)
+    add_definitions(-DNGRAPH_CPU_STATIC_LIB_ENABLE)
+endif()
+
+if(NGRAPH_INTERPRETER_STATIC_LIB_ENABLE)
+    add_definitions(-DNGRAPH_INTERPRETER_STATIC_LIB_ENABLE)
+endif()
+
+
 if(NGRAPH_TF_USE_GRAPPLER_OPTIMIZER)            
     list(REMOVE_ITEM SRC ngraph_rewrite_pass.cc)
     list(APPEND SRC grappler/ngraph_optimizer.cc)
diff --git a/ngraph_bridge/ngraph_backend_manager.cc b/ngraph_bridge/ngraph_backend_manager.cc
index d18f770e7..45bd45c2f 100644
--- a/ngraph_bridge/ngraph_backend_manager.cc
+++ b/ngraph_bridge/ngraph_backend_manager.cc
@@ -46,10 +46,13 @@ Status BackendManager::SetBackendName(const string& backend_name) {
 
 Status BackendManager::CreateBackend(const string& backend_name) {
 // Register backends for static linking
-#if defined(NGRAPH_BRIDGE_STATIC_LIB_ENABLE)
+#if defined(NGRAPH_CPU_STATIC_LIB_ENABLE)
   ngraph_register_cpu_backend();
+#endif
+#if defined(NGRAPH_INTERPRETER_STATIC_LIB_ENABLE)
   ngraph_register_interpreter_backend();
 #endif
+
   std::lock_guard<std::mutex> lock(BackendManager::ng_backend_map_mutex_);
   auto itr = BackendManager::ng_backend_map_.find(backend_name);
   // if backend does not exist create it

From faa1343089125cd2518a6d0473c029dd8d99b3f9 Mon Sep 17 00:00:00 2001
From: Kanvi Khanna <kanvi.khanna@intel.com>
Date: Thu, 19 Dec 2019 13:35:05 -0800
Subject: [PATCH 06/13] fix bazel

---
 bazel/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bazel/BUILD b/bazel/BUILD
index 7028b6a95..4a2d7e88c 100644
--- a/bazel/BUILD
+++ b/bazel/BUILD
@@ -132,6 +132,7 @@ cc_library(
         "-Wno-strict-overflow",
         '-D NGRAPH_BRIDGE_STATIC_LIB_ENABLE',
         '-D NGRAPH_TF_USE_GRAPPLER_OPTIMIZER',
+        '-D NGRAPH_CPU_STATIC_LIB_ENABLE',
         "-I ngraph_bridge",
         "-I logging",
         "-I external/ngraph/src",

From 63da9ffb777ef8a1fff5297fac11593c85667196 Mon Sep 17 00:00:00 2001
From: sindhu-nervana <sindhura.kantamani@intel.com>
Date: Thu, 19 Dec 2019 14:08:11 -0800
Subject: [PATCH 07/13] fix cpp example static linking

---
 examples/cpp/inference/CMakeLists.txt             | 4 ++++
 examples/cpp/inference/infer_multiple_networks.cc | 5 +++++
 examples/cpp/inference/infer_single_network.cc    | 5 +++++
 ngraph_bridge/version.cc                          | 8 --------
 ngraph_bridge/version.h                           | 4 ----
 5 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/examples/cpp/inference/CMakeLists.txt b/examples/cpp/inference/CMakeLists.txt
index 1ef7cb816..4688c7be7 100644
--- a/examples/cpp/inference/CMakeLists.txt
+++ b/examples/cpp/inference/CMakeLists.txt
@@ -61,6 +61,10 @@ if (APPLE)
     add_definitions(-DTEST_SINGLE_INSTANCE)
 endif()
 
+if(NGRAPH_BRIDGE_STATIC_LIB_ENABLE)
+    add_definitions(-DNGRAPH_BRIDGE_STATIC_LIB_ENABLE)
+endif()
+
 if(NGRAPH_BRIDGE_STATIC_LIB_ENABLE)
     target_link_libraries(${APP_NAME} 
         -Wl,--whole-archive
diff --git a/examples/cpp/inference/infer_multiple_networks.cc b/examples/cpp/inference/infer_multiple_networks.cc
index d5ea4b1bf..6606ca157 100644
--- a/examples/cpp/inference/infer_multiple_networks.cc
+++ b/examples/cpp/inference/infer_multiple_networks.cc
@@ -164,6 +164,11 @@ int main(int argc, char** argv) {
     return -1;
   }
 
+// Register backends for static linking
+#if defined(NGRAPH_BRIDGE_STATIC_LIB_ENABLE)
+  ngraph_register_cpu_backend();
+#endif
+
   const char* backend = "CPU";
   if (SetNGraphBackend(backend) != tf::Status::OK()) {
     std::cout << "Error: Cannot set the backend: " << backend << std::endl;
diff --git a/examples/cpp/inference/infer_single_network.cc b/examples/cpp/inference/infer_single_network.cc
index 5627ecdf5..62f0f5d22 100644
--- a/examples/cpp/inference/infer_single_network.cc
+++ b/examples/cpp/inference/infer_single_network.cc
@@ -160,6 +160,11 @@ int main(int argc, char** argv) {
     return -1;
   }
 
+// Register backends for static linking
+#if defined(NGRAPH_BRIDGE_STATIC_LIB_ENABLE)
+  ngraph_register_cpu_backend();
+#endif
+
   const char* backend = "CPU";
   if (SetNGraphBackend(backend) != tf::Status::OK()) {
     std::cout << "Error: Cannot set the backend: " << backend << std::endl;
diff --git a/ngraph_bridge/version.cc b/ngraph_bridge/version.cc
index 2ae1b0694..b47a3f206 100644
--- a/ngraph_bridge/version.cc
+++ b/ngraph_bridge/version.cc
@@ -73,14 +73,6 @@ bool ngraph_tf_are_variables_enabled() {
 #endif
 }
 
-bool ngraph_bridge_static_lib_enable() {
-#if defined(NGRAPH_BRIDGE_STATIC_LIB_ENABLE)
-  return true;
-#else
-  return false;
-#endif
-}
-
 bool ngraph_tf_is_distributed_enabled() {
 #if defined(NGRAPH_DISTRIBUTED)
   return true;
diff --git a/ngraph_bridge/version.h b/ngraph_bridge/version.h
index acb0cd7f9..40c5f06e2 100644
--- a/ngraph_bridge/version.h
+++ b/ngraph_bridge/version.h
@@ -38,10 +38,6 @@ bool ngraph_tf_is_grappler_enabled();
 // --enable_variables_and_optimizers flag
 bool ngraph_tf_are_variables_enabled();
 
-// Returns true when nGraph-bridge is built with
-// ----use_ngraph_staticlibs flag
-bool ngraph_bridge_static_lib_enable();
-
 // Return true when nGraph-bridge is build with
 // --distributed_build flag
 bool ngraph_tf_is_distributed_enabled();

From 1f0c0753d66ae04c46c82df2d8d1426268767f48 Mon Sep 17 00:00:00 2001
From: Sindhura Kantamani <sindhura.kantamani@intel.com>
Date: Thu, 19 Dec 2019 15:44:24 -0800
Subject: [PATCH 08/13] Update build_ngtf.py

---
 build_ngtf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build_ngtf.py b/build_ngtf.py
index b267cdea5..8960ad84a 100755
--- a/build_ngtf.py
+++ b/build_ngtf.py
@@ -53,7 +53,7 @@ def main():
     '''
 
     # Component versions
-    ngraph_version = "silee2/control_export"
+    ngraph_version = "v0.27.1-rc.1"
     tf_version = "v1.14.0"
 
     # Command line parser options

From 7704247212a9561801711e85bc62cbc2bd2f7a72 Mon Sep 17 00:00:00 2001
From: sindhu-nervana <sindhura.kantamani@intel.com>
Date: Thu, 19 Dec 2019 16:29:53 -0800
Subject: [PATCH 09/13] address comments

---
 examples/cpp/inference/infer_multiple_networks.cc | 2 +-
 examples/cpp/inference/infer_single_network.cc    | 2 +-
 test/test_ngraph_exec.cpp                         | 1 -
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/examples/cpp/inference/infer_multiple_networks.cc b/examples/cpp/inference/infer_multiple_networks.cc
index 6606ca157..48983bbde 100644
--- a/examples/cpp/inference/infer_multiple_networks.cc
+++ b/examples/cpp/inference/infer_multiple_networks.cc
@@ -164,7 +164,7 @@ int main(int argc, char** argv) {
     return -1;
   }
 
-// Register backends for static linking
+// Register cpu backend for static linking
 #if defined(NGRAPH_BRIDGE_STATIC_LIB_ENABLE)
   ngraph_register_cpu_backend();
 #endif
diff --git a/examples/cpp/inference/infer_single_network.cc b/examples/cpp/inference/infer_single_network.cc
index 62f0f5d22..b689cf657 100644
--- a/examples/cpp/inference/infer_single_network.cc
+++ b/examples/cpp/inference/infer_single_network.cc
@@ -160,7 +160,7 @@ int main(int argc, char** argv) {
     return -1;
   }
 
-// Register backends for static linking
+// Register cpu backend for static linking
 #if defined(NGRAPH_BRIDGE_STATIC_LIB_ENABLE)
   ngraph_register_cpu_backend();
 #endif
diff --git a/test/test_ngraph_exec.cpp b/test/test_ngraph_exec.cpp
index c5ae2e2b0..3bbec92b1 100644
--- a/test/test_ngraph_exec.cpp
+++ b/test/test_ngraph_exec.cpp
@@ -25,7 +25,6 @@
 #include "ngraph_bridge/ngraph_builder.h"
 #include "ngraph_bridge/ngraph_pipelined_tensors.h"
 #include "ngraph_bridge/ngraph_utils.h"
-#include "ngraph_bridge/version.h"
 
 #include "test/test_utilities.h"
 

From b267ba1f260ee8bf07ba9273d0a896823d816ced Mon Sep 17 00:00:00 2001
From: Shrestha Malik <shrestha.malik@intel.com>
Date: Fri, 20 Dec 2019 10:29:43 -0800
Subject: [PATCH 10/13] Shrestha/var in compute (#388)

- Enabled --var build to use parallel executor integrating weights-on-device and data pipelining
- moved ngraph_var files outside the var build
---
 bazel/BUILD                                   |   2 +
 ngraph_bridge/CMakeLists.txt                  |   2 +-
 .../enable_variable_ops/ngraph_assign_op.cc   |   4 +-
 .../ngraph_enter_in_catalog.cc                |  15 +-
 .../ngraph_rewrite_pass.cc                    |   8 ++
 .../ngraph_tracked_variable.cc                |   5 +-
 .../ngraph_variable_modifiers.cc              |   2 +-
 .../ngraph_variable_update_ng_tensor_op.cc    |   3 +-
 ngraph_bridge/ngraph_encapsulate_impl.cc      |   2 +-
 ngraph_bridge/ngraph_encapsulate_op.cc        |  79 +++++++----
 ngraph_bridge/ngraph_encapsulate_op_utils.cc  | 130 ++++++++++++++++--
 ngraph_bridge/ngraph_encapsulate_op_utils.h   |  38 ++++-
 .../ngraph_enter_prefetch_in_catalog.h        |   4 +-
 ngraph_bridge/ngraph_executor.cc              |   2 +-
 ngraph_bridge/ngraph_prefetch_dataset_op.cc   |  16 ++-
 ngraph_bridge/ngraph_tensor_manager.cc        |  46 ++++++-
 ngraph_bridge/ngraph_tensor_manager.h         |   2 +
 ngraph_bridge/ngraph_tracked_variable.cc      |   4 +-
 .../{enable_variable_ops => }/ngraph_var.cc   |   2 +-
 .../{enable_variable_ops => }/ngraph_var.h    |   0
 .../test_ng_var_update_ng_tensor.cc           |   2 +-
 test/python/test_flib.py                      |   1 +
 test/test_ng_var_update_ng_tensor_kernel.cc   |   2 +-
 tools/test_utils.py                           |   5 +-
 24 files changed, 304 insertions(+), 72 deletions(-)
 rename ngraph_bridge/{enable_variable_ops => }/ngraph_var.cc (98%)
 rename ngraph_bridge/{enable_variable_ops => }/ngraph_var.h (100%)

diff --git a/bazel/BUILD b/bazel/BUILD
index 7028b6a95..034ff0dec 100644
--- a/bazel/BUILD
+++ b/bazel/BUILD
@@ -48,6 +48,7 @@ cc_library(
         "ngraph_bridge/ngraph_tensor_manager.h",
         "ngraph_bridge/ngraph_timer.h",
         "ngraph_bridge/ngraph_utils.h",
+        "ngraph_bridge/ngraph_var.h",
         "ngraph_bridge/ngraph_version_utils.h",
         "ngraph_bridge/tf_deadness_analysis.h",
         "ngraph_bridge/tf_graphcycles.h",
@@ -92,6 +93,7 @@ cc_library(
         "ngraph_bridge/ngraph_tensor_manager.cc",
         "ngraph_bridge/ngraph_tracked_variable.cc",
         "ngraph_bridge/ngraph_utils.cc",
+        "ngraph_bridge/ngraph_var.cc",
         "ngraph_bridge/tf_deadness_analysis.cc",
         "ngraph_bridge/tf_graphcycles.cc",
         "ngraph_bridge/ops/ngraph_ops.cc",
diff --git a/ngraph_bridge/CMakeLists.txt b/ngraph_bridge/CMakeLists.txt
index 18d218dad..eb104ae3b 100644
--- a/ngraph_bridge/CMakeLists.txt
+++ b/ngraph_bridge/CMakeLists.txt
@@ -57,6 +57,7 @@ set(SRC
    ngraph_rewrite_pass.cc
    ngraph_tensor_manager.cc
    ngraph_tracked_variable.cc
+   ngraph_var.cc
    ngraph_utils.cc
    tf_graphcycles.cc
    tf_deadness_analysis.cc
@@ -86,7 +87,6 @@ if(NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS)
     list(APPEND SRC enable_variable_ops/ngraph_tracked_variable.cc)
 
     # new files
-    list(APPEND SRC enable_variable_ops/ngraph_var.cc)
     list(APPEND SRC enable_variable_ops/ngraph_assign_op.cc)
     list(APPEND SRC enable_variable_ops/ngraph_enter_in_catalog.cc)
     list(APPEND SRC enable_variable_ops/ngraph_remove_ngraphassigns.cc)
diff --git a/ngraph_bridge/enable_variable_ops/ngraph_assign_op.cc b/ngraph_bridge/enable_variable_ops/ngraph_assign_op.cc
index b9f041e8b..35099bbc7 100644
--- a/ngraph_bridge/enable_variable_ops/ngraph_assign_op.cc
+++ b/ngraph_bridge/enable_variable_ops/ngraph_assign_op.cc
@@ -25,11 +25,11 @@
 #include "ngraph/event_tracing.hpp"
 #include "ngraph/runtime/backend.hpp"
 
-#include "ngraph_bridge/enable_variable_ops/ngraph_var.h"
 #include "ngraph_bridge/ngraph_catalog.h"
 #include "ngraph_bridge/ngraph_freshness_tracker.h"
 #include "ngraph_bridge/ngraph_timer.h"
 #include "ngraph_bridge/ngraph_utils.h"
+#include "ngraph_bridge/ngraph_var.h"
 
 using namespace std;
 namespace ng = ngraph;
@@ -83,7 +83,7 @@ class NGraphAssignOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     std::ostringstream oss;
-    oss << "Execute: Assign_" << my_instance_id << ": " << name();
+    oss << "NGAssign::Compute::" << name();
     ngraph::Event event_compute(oss.str(), name(), "");
 
     NGRAPH_VLOG(4) << "NGraphAssign:: Compute called for: " << def().name()
diff --git a/ngraph_bridge/enable_variable_ops/ngraph_enter_in_catalog.cc b/ngraph_bridge/enable_variable_ops/ngraph_enter_in_catalog.cc
index a456ef6e8..c96a4932e 100644
--- a/ngraph_bridge/enable_variable_ops/ngraph_enter_in_catalog.cc
+++ b/ngraph_bridge/enable_variable_ops/ngraph_enter_in_catalog.cc
@@ -160,15 +160,12 @@ Status EnterInCatalog(Graph* graph, int graph_id) {
         }
       }
 
-      // are there indexes that need copy
-      if (op_index_to_copy.size() > 0) {
-        try {
-          NGraphCatalog::AddToEncapOutputCopyIndexesMap(graph_id, node->name(),
-                                                        op_index_to_copy);
-        } catch (const std::exception& exp) {
-          return errors::Internal(
-              "Caught exception while entering in catalog: ", exp.what(), "\n");
-        }
+      try {
+        NGraphCatalog::AddToEncapOutputCopyIndexesMap(graph_id, node->name(),
+                                                      op_index_to_copy);
+      } catch (const std::exception& exp) {
+        return errors::Internal("Caught exception while entering in catalog: ",
+                                exp.what(), "\n");
       }
 
     }  // end of node is type NGraphEncapsulate
diff --git a/ngraph_bridge/enable_variable_ops/ngraph_rewrite_pass.cc b/ngraph_bridge/enable_variable_ops/ngraph_rewrite_pass.cc
index b764713ab..ea97ff417 100644
--- a/ngraph_bridge/enable_variable_ops/ngraph_rewrite_pass.cc
+++ b/ngraph_bridge/enable_variable_ops/ngraph_rewrite_pass.cc
@@ -30,6 +30,7 @@
 #include "ngraph_bridge/ngraph_cluster_manager.h"
 #include "ngraph_bridge/ngraph_deassign_clusters.h"
 #include "ngraph_bridge/ngraph_encapsulate_clusters.h"
+#include "ngraph_bridge/ngraph_enter_prefetch_in_catalog.h"
 #include "ngraph_bridge/ngraph_mark_for_clustering.h"
 #include "ngraph_bridge/ngraph_rewrite_for_tracking.h"
 #include "ngraph_bridge/ngraph_utils.h"
@@ -255,6 +256,13 @@ class NGraphEncapsulationPass : public NGraphRewritePass {
                  "Graph with NGraphAssigns Optimized/Removed");
     }
 
+    // 8. Enter Prefetch in catalog then.
+    TF_RETURN_IF_ERROR(EnterPrefetchInCatalog(options.graph->get(), idx));
+    if (DumpCatalogedGraphs()) {
+      DumpGraphs(options, idx, "prefetch-cataloged",
+                 "Graph with Prefetched Inputs Entered in Catalog");
+    }
+
     return Status::OK();
   }
 
diff --git a/ngraph_bridge/enable_variable_ops/ngraph_tracked_variable.cc b/ngraph_bridge/enable_variable_ops/ngraph_tracked_variable.cc
index c034d13c7..8b5b81f68 100644
--- a/ngraph_bridge/enable_variable_ops/ngraph_tracked_variable.cc
+++ b/ngraph_bridge/enable_variable_ops/ngraph_tracked_variable.cc
@@ -23,11 +23,11 @@
 #include "ngraph/event_tracing.hpp"
 #include "ngraph/runtime/backend.hpp"
 
-#include "ngraph_bridge/enable_variable_ops/ngraph_var.h"
 #include "ngraph_bridge/ngraph_backend_manager.h"
 #include "ngraph_bridge/ngraph_catalog.h"
 #include "ngraph_bridge/ngraph_freshness_tracker.h"
 #include "ngraph_bridge/ngraph_utils.h"
+#include "ngraph_bridge/ngraph_var.h"
 
 using namespace std;
 namespace ng = ngraph;
@@ -119,7 +119,7 @@ void NGraphVariableOp::Compute(OpKernelContext* ctx) {
                  << " ,backend_name " << ng_backend_name_;
 
   std::ostringstream oss;
-  oss << "NGraphVariable: " << my_instance_id << ": " << name();
+  oss << "NGVariable::Compute::" << name();
   ngraph::Event event_compute(oss.str(), name(), "");
 
   bool log_copies = false;
@@ -250,6 +250,7 @@ void NGraphVariableOp::Compute(OpKernelContext* ctx) {
     ctx->record_persistent_memory_allocation(var->tensor()->AllocatedBytes());
   }
   var->Unref();
+  event_compute.Stop();
   ngraph::Event::write_trace(event_compute);
 }
 
diff --git a/ngraph_bridge/enable_variable_ops/ngraph_variable_modifiers.cc b/ngraph_bridge/enable_variable_ops/ngraph_variable_modifiers.cc
index 5fc190bea..376a596a9 100644
--- a/ngraph_bridge/enable_variable_ops/ngraph_variable_modifiers.cc
+++ b/ngraph_bridge/enable_variable_ops/ngraph_variable_modifiers.cc
@@ -26,12 +26,12 @@
 
 #include "ngraph/runtime/backend.hpp"
 
-#include "ngraph_bridge/enable_variable_ops/ngraph_var.h"
 #include "ngraph_bridge/ngraph_backend_manager.h"
 #include "ngraph_bridge/ngraph_catalog.h"
 #include "ngraph_bridge/ngraph_freshness_tracker.h"
 #include "ngraph_bridge/ngraph_timer.h"
 #include "ngraph_bridge/ngraph_utils.h"
+#include "ngraph_bridge/ngraph_var.h"
 
 using namespace std;
 namespace ng = ngraph;
diff --git a/ngraph_bridge/enable_variable_ops/ngraph_variable_update_ng_tensor_op.cc b/ngraph_bridge/enable_variable_ops/ngraph_variable_update_ng_tensor_op.cc
index fdb432f79..8755f6f76 100644
--- a/ngraph_bridge/enable_variable_ops/ngraph_variable_update_ng_tensor_op.cc
+++ b/ngraph_bridge/enable_variable_ops/ngraph_variable_update_ng_tensor_op.cc
@@ -24,10 +24,10 @@
 
 #include "ngraph/event_tracing.hpp"
 
-#include "ngraph_bridge/enable_variable_ops/ngraph_var.h"
 #include "ngraph_bridge/enable_variable_ops/ngraph_variable_update_ng_tensor_op.h"
 #include "ngraph_bridge/ngraph_timer.h"
 #include "ngraph_bridge/ngraph_utils.h"
+#include "ngraph_bridge/ngraph_var.h"
 
 using namespace std;
 namespace ng = ngraph;
@@ -67,6 +67,7 @@ NGraphVariableUpdateNGTensorOp::~NGraphVariableUpdateNGTensorOp() {
 void NGraphVariableUpdateNGTensorOp::Compute(OpKernelContext* context) {
   std::ostringstream oss;
   // Start event tracing
+  oss << "NGVariableUpdateNGTensor::Compute::" << name();
   ngraph::Event event_compute(oss.str(), name(), "");
   bool log_copies = false;
   OP_REQUIRES_OK(context,
diff --git a/ngraph_bridge/ngraph_encapsulate_impl.cc b/ngraph_bridge/ngraph_encapsulate_impl.cc
index 7823f0a7d..f2ddf1ecd 100644
--- a/ngraph_bridge/ngraph_encapsulate_impl.cc
+++ b/ngraph_bridge/ngraph_encapsulate_impl.cc
@@ -45,8 +45,8 @@
 #include "ngraph_bridge/ngraph_timer.h"
 #include "ngraph_bridge/ngraph_utils.h"
 
+#include "ngraph_bridge/ngraph_var.h"
 #if defined(NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS)
-#include "ngraph_bridge/enable_variable_ops/ngraph_var.h"
 #include "ngraph_bridge/ngraph_catalog.h"
 #endif
 
diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc
index 9a48d8c92..4605757ae 100644
--- a/ngraph_bridge/ngraph_encapsulate_op.cc
+++ b/ngraph_bridge/ngraph_encapsulate_op.cc
@@ -49,9 +49,9 @@
 #include "ngraph_bridge/ngraph_prefetch_shared_data.h"
 #include "ngraph_bridge/ngraph_timer.h"
 #include "ngraph_bridge/ngraph_utils.h"
+#include "ngraph_bridge/ngraph_var.h"
 
 #if defined(NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS)
-#include "ngraph_bridge/enable_variable_ops/ngraph_var.h"
 #include "ngraph_bridge/ngraph_catalog.h"
 #endif
 
@@ -88,13 +88,8 @@ NGraphEncapsulateOp::NGraphEncapsulateOp(OpKernelConstruction* ctx)
       ctx, backend != nullptr,
       errors::Internal("Cannot get the backend object for BE: ", be_name));
 
-// If we have the VARIABLE capture on then we can't use the
-// parallel executor until that support is added.
-#if !defined(NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS)
+  // If backend executable can create tensors we use parallel executor
   m_use_parallel_executor = backend->executable_can_create_tensors();
-#else
-  m_use_parallel_executor = false;
-#endif
 
   // Override the switch for debugging/testing
   if (std::getenv("NGRAPH_TF_USE_LEGACY_EXECUTOR") != nullptr) {
@@ -402,7 +397,7 @@ NGraphEncapsulateOp::~NGraphEncapsulateOp() {
 // OpKernel::Compute
 //---------------------------------------------------------------------------
 void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) {
-  ngraph::Event event_compute("Compute", "", "");
+  ngraph::Event event_compute("NGEncap::Compute::" + name(), name(), "");
 
   if (m_use_parallel_executor) {
     NGRAPH_VLOG(1) << "NGraphEncapsulateOp::Compute: Using Parallel Executor";
@@ -459,6 +454,7 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) {
                                m_parallel_executor->GetTensorPipelineDepth()));
 
   // Get Tensor Manager and some error checking
+  ngraph::Event event_prepare_ng_tensors("Prepare NG In/Out Tensors", "", "");
   auto tensor_manager = m_parallel_executor->GetTensorManager();
   int num_of_inputs = tensor_manager->GetNumberOfInputs();
   int num_of_outputs = tensor_manager->GetNumberOfOutputs();
@@ -499,14 +495,18 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) {
   vector<shared_ptr<ng::runtime::Tensor>> ng_inputs(num_of_inputs);
   vector<shared_ptr<ng::runtime::Tensor>> ng_outputs(num_of_outputs);
 
-  // All inputs and outputs are pipelined.
-  // Of all these pipelined inputs some are prefetched
-  // TODO: Fit in variables
-  ng_inputs = get<1>(pipelined_io_tensors);
-  ng_outputs = get<2>(pipelined_io_tensors);
+  // Prepare NG Input Output Tensors
+  // Assemble Variable tensors and pipelined tensors to ng_input and ng_outputs
+  OP_REQUIRES_OK(ctx, GetIOTensorsReadyForExecution(
+                          ctx, tensor_manager, get<1>(pipelined_io_tensors),
+                          get<2>(pipelined_io_tensors), ng_inputs, ng_outputs));
+  event_prepare_ng_tensors.Stop();
+  ngraph::Event::write_trace(event_prepare_ng_tensors);
 
   // And execute
-  ngraph::Event event_execute_graph("Execute Graph", "", "");
+  ngraph::Event event_execute_graph(
+      "Execute Graph Pipeline Indx" + to_string(current_iter_pipeline_depth),
+      "", "");
 
   BackendManager::LockBackend(m_parallel_executor->GetOpBackendName());
   NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute call starting for cluster "
@@ -540,12 +540,14 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) {
   ngraph::Event::write_trace(event_execute_graph);
 
   // Now prepare the output
-  ngraph::Event event_copy_output_tensor("Copy Output Tensor", "", "");
+  // Allocate TF Tensors
+  NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute Allocating TF Output Tensors "
+                 << m_parallel_executor->GetNgraphClusterId();
 
-  std::vector<std::unique_ptr<ngraph::Event>> output_copy_events;
+  ngraph::Event event_prepare_tf_output_tensors("Prepare TF Output Tensor", "",
+                                                "");
+  vector<Tensor*> tf_output_tensors;
   for (auto i = 0; i < ng_exec->get_results().size(); i++) {
-    std::unique_ptr<ngraph::Event> event_copy_prep(
-        new ngraph::Event("Copy Prep", "", ""));
     auto ng_element = ng_exec->get_results()[i];
     auto ng_shape = ng_element->get_shape();
     auto ng_element_type = ng_element->get_element_type();
@@ -558,7 +560,7 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) {
     TensorShape tf_shape(dims);
     Tensor* tf_output_tensor = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(i, tf_shape, &tf_output_tensor));
-
+    tf_output_tensors.push_back(tf_output_tensor);
     // Make sure the nGraph-inferred element type agrees with what TensorFlow
     // expected.
     ng::element::Type expected_elem_type;
@@ -569,28 +571,45 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) {
         ctx, ng_element_type == expected_elem_type,
         errors::Internal("Element type inferred by nGraph does not match "
                          "the element type expected by TensorFlow"));
-    event_copy_prep->Stop();
-    output_copy_events.push_back(std::move(event_copy_prep));
+  }
 
-    // Now copy the nGraph Tensor to Host Tensor
-    std::unique_ptr<ngraph::Event> event_copy_d2h(
-        new ngraph::Event("Device to Host Copy", "", ""));
-    void* dst_ptr = DMAHelper::base(tf_output_tensor);
+  // Copy Tensors that are required
+  NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute Read NG Output Tensors "
+                 << m_parallel_executor->GetNgraphClusterId();
 
-    ng_outputs[i]->read(
-        dst_ptr, ng_outputs[i]->get_element_count() * ng_element_type.size());
+  std::vector<std::unique_ptr<ngraph::Event>> output_copy_events;
+
+  auto output_indexes_to_be_copied =
+      tensor_manager->GetOutputIndexesThatNeedCopy();
+  for (auto output_index : output_indexes_to_be_copied) {
+    // Copy the nGraph Tensor to Host Tensor
+    std::unique_ptr<ngraph::Event> event_copy_d2h(new ngraph::Event(
+        "D2H_Output_" + std::to_string(output_index), "", ""));
+    void* dst_ptr = (void*)DMAHelper::base(tf_output_tensors[output_index]);
+    ng_outputs[output_index]->read(
+        dst_ptr, ng_outputs[output_index]->get_element_count() *
+                     ng_outputs[output_index]->get_element_type().size());
     event_copy_d2h->Stop();
     output_copy_events.push_back(std::move(event_copy_d2h));
   }
-
   for (auto& next : output_copy_events) {
     ngraph::Event::write_trace(*next.get());
   }
+  event_prepare_tf_output_tensors.Stop();
+  ngraph::Event::write_trace(event_prepare_tf_output_tensors);
 
-  event_copy_output_tensor.Stop();
-  ngraph::Event::write_trace(event_copy_output_tensor);
+  // Synch Var Output Tensors as required
+  NGRAPH_VLOG(4)
+      << "NGraphEncapsulateOp::Compute Sync NG Output Variable Tensors "
+      << m_parallel_executor->GetNgraphClusterId();
+  ngraph::Event event_update_ngvar_tensors("Update NGVar Tensors", "", "");
+  OP_REQUIRES_OK(ctx, SyncOutputVarTensors(ctx, tensor_manager));
+  event_update_ngvar_tensors.Stop();
+  ngraph::Event::write_trace(event_update_ngvar_tensors);
 
   // Now return them to the cache
+  NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Returning Tensors "
+                 << m_parallel_executor->GetNgraphClusterId();
   ngraph::Event event_return_tensor("Return Tensor", "", "");
   pipelined_tensor_store->return_tensors(current_iter_pipeline_depth);
 
diff --git a/ngraph_bridge/ngraph_encapsulate_op_utils.cc b/ngraph_bridge/ngraph_encapsulate_op_utils.cc
index 51eca36de..d12494e45 100644
--- a/ngraph_bridge/ngraph_encapsulate_op_utils.cc
+++ b/ngraph_bridge/ngraph_encapsulate_op_utils.cc
@@ -18,17 +18,22 @@
 #include "ngraph_bridge/ngraph_prefetch_shared_data.h"
 #include "ngraph_bridge/ngraph_utils.h"
 
+#include "ngraph_bridge/ngraph_var.h"
+
 using namespace std;
 
 namespace tensorflow {
 
 namespace ngraph_bridge {
 
+//---------------------------------------------------------------------------
+//  GetPipelinedIOTensorsReadyForExecution
+//---------------------------------------------------------------------------
 Status GetPipelinedIOTensorsReadyForExecution(
-    OpKernelContext* ctx, std::vector<Tensor>& tf_input_tensors,
-    shared_ptr<PipelinedTensorsStore>& pipelined_tensor_store,
-    shared_ptr<NGraphTensorManager>& tensor_manager,
-    std::tuple<int, PipelinedTensorVector, PipelinedTensorVector>&
+    OpKernelContext* ctx, const vector<Tensor>& tf_input_tensors,
+    const shared_ptr<PipelinedTensorsStore>& pipelined_tensor_store,
+    const shared_ptr<NGraphTensorManager>& tensor_manager,
+    tuple<int, PipelinedTensorVector, PipelinedTensorVector>&
         pipelined_io_tensors) {
   auto io_tensors = pipelined_tensor_store->get_tensors();
 
@@ -84,7 +89,7 @@ Status GetPipelinedIOTensorsReadyForExecution(
           tensor_manager->GetInputIndexesForPrefetchSharedObject());
 
       // Get the set of IO tensors for the next iteration
-      std::tuple<int, PipelinedTensorVector, PipelinedTensorVector>
+      tuple<int, PipelinedTensorVector, PipelinedTensorVector>
           io_tensors_next_iter;
       io_tensors_next_iter = pipelined_tensor_store->get_tensors();
 
@@ -154,18 +159,21 @@ Status GetPipelinedIOTensorsReadyForExecution(
 
   // Allocate the input/
   ngraph::Event event_copy_input_tensor("Copy Pipelined Input Tensors", "", "");
-
+  std::vector<std::unique_ptr<ngraph::Event>> input_write_events;
   if (!skip_tf2ng_copy) {
     // All pipelined inputs are copied
 
     for (auto i = 0; i < pipelined_input_indexes.size(); i++) {
       int tf_index = pipelined_input_indexes[i];
-
       ng::element::Type ng_element_type;
       TF_RETURN_IF_ERROR(TFDataTypeToNGraphElementType(
           tf_input_tensors[tf_index].dtype(), &ng_element_type));
       void* current_src_ptr =
           (void*)DMAHelper::base(&tf_input_tensors[tf_index]);
+
+      std::unique_ptr<ngraph::Event> event_copy_h2d(
+          new ngraph::Event("H2D_Input_" + std::to_string(tf_index), "", ""));
+
       try {
         ng_pipelined_inputs[i]->write(
             current_src_ptr, ng_pipelined_inputs[i]->get_element_count() *
@@ -176,6 +184,8 @@ Status GetPipelinedIOTensorsReadyForExecution(
       } catch (...) {
         return errors::Internal("Error copying TF tensor to device tensor");
       }
+      event_copy_h2d->Stop();
+      input_write_events.push_back(std::move(event_copy_h2d));
     }
   } else {
     // All pipelined inputs that are not prefetched are copied
@@ -199,19 +209,27 @@ Status GetPipelinedIOTensorsReadyForExecution(
           tf_input_tensors[tf_index].dtype(), &ng_element_type));
       void* current_src_ptr =
           (void*)DMAHelper::base(&tf_input_tensors[tf_index]);
+      unique_ptr<ngraph::Event> event_copy_h2d(
+          new ngraph::Event("H2D_Input_" + to_string(tf_index), "", ""));
       try {
         ng_pipelined_inputs[ng_index]->write(
             current_src_ptr,
             ng_pipelined_inputs[ng_index]->get_element_count() *
                 ng_element_type.size());
-      } catch (const std::exception& exp) {
+      } catch (const exception& exp) {
         return errors::Internal("Error copying TF tensor to device tensor: ",
                                 exp.what());
       } catch (...) {
         return errors::Internal("Error copying TF tensor to device tensor");
       }
+      event_copy_h2d->Stop();
+      input_write_events.push_back(move(event_copy_h2d));
     }
   }
+
+  for (auto& next : input_write_events) {
+    ngraph::Event::write_trace(*next.get());
+  }
   event_copy_input_tensor.Stop();
   ngraph::Event::write_trace(event_copy_input_tensor);
 
@@ -221,5 +239,101 @@ Status GetPipelinedIOTensorsReadyForExecution(
   return Status::OK();
 }
 
+//---------------------------------------------------------------------------
+//  GetTensorFromContext
+//---------------------------------------------------------------------------
+Status GetTensorFromContext(const OpKernelContext* ctx,
+                            const string& shared_name,
+                            shared_ptr<ng::runtime::Tensor>& ng_tensor) {
+  // Get shared name from tensor manager
+  NGraphVar* var;
+  TF_RETURN_IF_ERROR(ctx->resource_manager()->Lookup<NGraphVar>(
+      ctx->resource_manager()->default_container(), shared_name, &var));
+  ng_tensor = var->ng_tensor();
+  var->Unref();
+  return Status::OK();
+}
+
+//---------------------------------------------------------------------------
+//  GetIOTensorsReadyForExecution
+//---------------------------------------------------------------------------
+Status GetIOTensorsReadyForExecution(
+    OpKernelContext* ctx, const shared_ptr<NGraphTensorManager>& tensor_manager,
+    const PipelinedTensorVector& pipelined_in_tensors,
+    const PipelinedTensorVector& pipelined_out_tensors,
+    vector<shared_ptr<ng::runtime::Tensor>>& ng_inputs,
+    vector<shared_ptr<ng::runtime::Tensor>>& ng_outputs) {
+  // Get Variables that are inputs
+  auto var_input_indexes = tensor_manager->GetInputIndexesFedByVariables();
+  for (int input_index : var_input_indexes) {
+    string shared_name;
+    TF_RETURN_IF_ERROR(
+        tensor_manager->GetInputVariableSharedName(input_index, &shared_name));
+    TF_RETURN_IF_ERROR(
+        GetTensorFromContext(ctx, shared_name, ng_inputs[input_index]));
+  }
+
+  // Get Variables that are outputs
+  auto var_output_indexes =
+      tensor_manager->GetOutputIndexesAssigningVariables();
+  for (int output_index : var_output_indexes) {
+    string shared_name;
+    TF_RETURN_IF_ERROR(tensor_manager->GetOutputVariableSharedName(
+        output_index, &shared_name));
+    TF_RETURN_IF_ERROR(
+        GetTensorFromContext(ctx, shared_name, ng_outputs[output_index]));
+  }
+
+  // Fit Pipelined Input Tensors
+  auto pipelined_input_indexes = tensor_manager->GetPipelinedInputIndexes();
+  for (int i = 0; i < pipelined_input_indexes.size(); i++) {
+    int input_index = pipelined_input_indexes[i];
+    ng_inputs[input_index] = pipelined_in_tensors[i];
+  }
+
+  // Fit Pipelined Output Tensors
+  auto pipelined_output_indexes = tensor_manager->GetPipelinedOutputIndexes();
+  for (int i = 0; i < pipelined_output_indexes.size(); i++) {
+    int output_index = pipelined_output_indexes[i];
+    ng_outputs[output_index] = pipelined_out_tensors[i];
+  }
+
+  return Status::OK();
+}
+
+//---------------------------------------------------------------------------
+//  SyncOutputVarTensors
+//---------------------------------------------------------------------------
+Status SyncOutputVarTensors(
+    const OpKernelContext* ctx,
+    const shared_ptr<NGraphTensorManager>& tensor_manager) {
+  // Get Variables that are outputs
+  auto var_output_indexes =
+      tensor_manager->GetOutputIndexesAssigningVariables();
+  NGRAPH_VLOG(4) << "output indexes size " << var_output_indexes.size();
+
+  for (int output_index : var_output_indexes) {
+    bool copy_to_tf;
+    TF_RETURN_IF_ERROR(
+        tensor_manager->GetOutputVariableCopyToTF(output_index, &copy_to_tf));
+
+    if (copy_to_tf) {
+      NGRAPH_VLOG(4) << "Sync NG Output Variable Tensors " << output_index;
+      // Get shared name from tensor manager
+      string shared_name;
+      TF_RETURN_IF_ERROR(tensor_manager->GetOutputVariableSharedName(
+          output_index, &shared_name));
+      NGraphVar* var;
+      TF_RETURN_IF_ERROR(ctx->resource_manager()->Lookup<NGraphVar>(
+          ctx->resource_manager()->default_container(), shared_name, &var));
+      // update tensor
+      var->copy_ng_to_tf();
+      var->Unref();
+      NGRAPH_VLOG(4) << "Sync Completed " << output_index;
+    }
+  }
+  return Status::OK();
+}
+
 }  // namespace ngraph_bridge
 }  // namespace tensorflow
diff --git a/ngraph_bridge/ngraph_encapsulate_op_utils.h b/ngraph_bridge/ngraph_encapsulate_op_utils.h
index 7f48eb09c..1a6df4ede 100644
--- a/ngraph_bridge/ngraph_encapsulate_op_utils.h
+++ b/ngraph_bridge/ngraph_encapsulate_op_utils.h
@@ -46,12 +46,44 @@ namespace ngraph_bridge {
 //
 
 Status GetPipelinedIOTensorsReadyForExecution(
-    OpKernelContext* ctx, vector<Tensor>& tf_input_tensors,
-    shared_ptr<PipelinedTensorsStore>& pipelined_tensor_store,
-    shared_ptr<NGraphTensorManager>& tensor_manager,
+    OpKernelContext* ctx, const vector<Tensor>& tf_input_tensors,
+    const shared_ptr<PipelinedTensorsStore>& pipelined_tensor_store,
+    const shared_ptr<NGraphTensorManager>& tensor_manager,
     tuple<int, PipelinedTensorVector, PipelinedTensorVector>&
         pipelined_io_tensors);
 
+// Assembles the different types of input and output tensors
+// Variable tensors and pipelined tensors are put together in the right order
+// into ng_inputs and ng_outputs
+// 1. For input indexes that are fed by variables, get the variable tensors from
+// context
+// 2. For output indexes that are updating variables, get the variable tensors
+// from context
+//    This enable update-in-place
+// 3. For input and output indexes that are pipelined, get the respective tensor
+//
+Status GetIOTensorsReadyForExecution(
+    OpKernelContext* ctx, const shared_ptr<NGraphTensorManager>& tensor_manager,
+    const PipelinedTensorVector& pipelined_in_tensors,
+    const PipelinedTensorVector& pipelined_out_tensors,
+    vector<shared_ptr<ng::runtime::Tensor>>& ng_inputs,
+    vector<shared_ptr<ng::runtime::Tensor>>& ng_outputs);
+
+// Gets the Tensor from OpKernelContext's Container for the given shared_name
+Status GetTensorFromContext(const OpKernelContext* ctx,
+                            const string& shared_name,
+                            shared_ptr<ng::runtime::Tensor>& ng_tensor);
+
+// Encapsulate Op updates the NGVariable's device tensor in-place
+// ie. the NGVariable's backend tensor is updated
+// Some of these Variables may be required by the TF ops and they will use the
+// host tensor
+// These were marked as "copy-to-tf" True in the Rewrite Phase
+// We will update these tensors here
+Status SyncOutputVarTensors(
+    const OpKernelContext* ctx,
+    const shared_ptr<NGraphTensorManager>& tensor_manager);
+
 }  // namespace ngraph_bridge
 }  // namespace tensorflow
 
diff --git a/ngraph_bridge/ngraph_enter_prefetch_in_catalog.h b/ngraph_bridge/ngraph_enter_prefetch_in_catalog.h
index d7ab8cc9c..534166aa1 100644
--- a/ngraph_bridge/ngraph_enter_prefetch_in_catalog.h
+++ b/ngraph_bridge/ngraph_enter_prefetch_in_catalog.h
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  *******************************************************************************/
-#ifndef NGRAPH_TF_ENTER_IN_CATALOG_H_
-#define NGRAPH_TF_ENTER_IN_CATALOG_H_
+#ifndef NGRAPH_TF_ENTER_PREFETCH_IN_CATALOG_H_
+#define NGRAPH_TF_ENTER_PREFETCH_IN_CATALOG_H_
 #pragma once
 
 #include "tensorflow/core/graph/graph.h"
diff --git a/ngraph_bridge/ngraph_executor.cc b/ngraph_bridge/ngraph_executor.cc
index 37e1b8b40..7d4fe2c2a 100644
--- a/ngraph_bridge/ngraph_executor.cc
+++ b/ngraph_bridge/ngraph_executor.cc
@@ -43,9 +43,9 @@
 #include "ngraph_bridge/ngraph_mark_for_clustering.h"
 #include "ngraph_bridge/ngraph_timer.h"
 #include "ngraph_bridge/ngraph_utils.h"
+#include "ngraph_bridge/ngraph_var.h"
 
 #if defined(NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS)
-#include "ngraph_bridge/enable_variable_ops/ngraph_var.h"
 #include "ngraph_bridge/ngraph_catalog.h"
 #endif
 
diff --git a/ngraph_bridge/ngraph_prefetch_dataset_op.cc b/ngraph_bridge/ngraph_prefetch_dataset_op.cc
index 18b946191..7c131bcce 100644
--- a/ngraph_bridge/ngraph_prefetch_dataset_op.cc
+++ b/ngraph_bridge/ngraph_prefetch_dataset_op.cc
@@ -415,14 +415,15 @@ class NGraphPrefetchDatasetOp::Dataset : public DatasetBase {
             ngraph_bridge::NGraphPrefetchSharedResouce::RESOURCE_NAME,
             &shared_data);
         if (s.ok()) {
-          ngraph::Event evt_dev_cp("Prf Dev Copy", "Copy", "");
           shared_data->SetBufferDepth(m_buffer_size);
 
           auto ng_input_tensor_bundle =
               shared_data->GetNextIOTensorBundleForDeviceTransfer();
           auto ng_prefetch_input_indexes_map =
               shared_data->GetPrefetchInputIndexesMap();
-
+          ngraph::Event evt_dev_cp(
+              "Prf Dev Copy: Pipe_Ind_" + to_string(ng_input_tensor_bundle.Id),
+              "Copy", "");
           int number_of_buffer_elements = buffer_element.value.size();
           if (number_of_buffer_elements !=
               ng_prefetch_input_indexes_map.size()) {
@@ -433,7 +434,8 @@ class NGraphPrefetchDatasetOp::Dataset : public DatasetBase {
                 "encap " +
                 to_string(ng_prefetch_input_indexes_map.size()));
           }
-
+          std::vector<std::unique_ptr<ngraph::Event>>
+              prefetch_input_write_events;
           // Write to these tensors
           for (auto itr : ng_prefetch_input_indexes_map) {
             int ng_index = itr.first;
@@ -445,6 +447,8 @@ class NGraphPrefetchDatasetOp::Dataset : public DatasetBase {
 
             void* current_src_ptr =
                 (void*)DMAHelper::base(&buffer_element.value[tf_index]);
+            std::unique_ptr<ngraph::Event> event_copy_h2d(new ngraph::Event(
+                "H2D_PrefetchInput_" + std::to_string(tf_index), "Copy", ""));
             try {
               NGRAPH_VLOG(2)
                   << "[PREFETCH] INPUT tensor being written by Prefetch: "
@@ -459,6 +463,12 @@ class NGraphPrefetchDatasetOp::Dataset : public DatasetBase {
               throw std::runtime_error(
                   "Error copying TF tensor to device tensor");
             }
+            event_copy_h2d->Stop();
+            prefetch_input_write_events.push_back(std::move(event_copy_h2d));
+          }
+
+          for (auto& next : prefetch_input_write_events) {
+            ngraph::Event::write_trace(*next.get());
           }
 
           // Now add them back to the other queue
diff --git a/ngraph_bridge/ngraph_tensor_manager.cc b/ngraph_bridge/ngraph_tensor_manager.cc
index 518ae96fe..116c213ec 100644
--- a/ngraph_bridge/ngraph_tensor_manager.cc
+++ b/ngraph_bridge/ngraph_tensor_manager.cc
@@ -42,7 +42,6 @@ NGraphTensorManager::NGraphTensorManager(const string ng_encap_node_name,
 
 void NGraphTensorManager::Initialize() {
 #if defined(NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS)
-
   // input variables book-keeping
   for (int index = 0; index < m_number_of_inputs; index++) {
     if (NGraphCatalog::ExistsInInputVariableSharedNameMap(
@@ -86,6 +85,17 @@ void NGraphTensorManager::Initialize() {
       m_output_indexes_that_need_copy.push_back(index);
     }
   }
+
+  // For graphs that were run through AOT
+  // Graph rewrite is not done, and there is no entry in catalog
+  // If there is no entry in catalog all outputs need to be copied
+  if (!NGraphCatalog::EncapOutputNeedsCopy(m_ng_encap_graph_id,
+                                           m_ng_encap_node_name)) {
+    m_output_indexes_that_need_copy.resize(m_number_of_outputs);
+    iota(begin(m_output_indexes_that_need_copy),
+         end(m_output_indexes_that_need_copy), 0);
+  }
+
 #else
   m_output_indexes_that_need_copy.resize(m_number_of_outputs);
   iota(begin(m_output_indexes_that_need_copy),
@@ -140,6 +150,40 @@ void NGraphTensorManager::Initialize() {
       FindComplement(m_pipelined_input_indexes, m_prefetched_input_indexes);
 }
 
+//---------------------------------------------------------------------------
+//  NGraphTensorManager::Print
+//---------------------------------------------------------------------------
+void NGraphTensorManager::Print() {
+  auto PrintVector = [](const vector<int>& input_vector, const string title) {
+    cout << title << endl;
+    cout << ng::join(input_vector) << endl;
+  };
+
+  cout << "** NGEncapsulate TensorManager:" << m_ng_encap_node_name << " **"
+       << endl;
+
+  cout << "** Variables Related **" << endl;
+  PrintVector(m_input_indexes_from_variables, "Input Indexes from Variables");
+  PrintVector(m_output_indexes_assigning_variable,
+              "Output Indexes Referring to Variables");
+  PrintVector(m_output_indexes_that_need_copy, "Output Indexes to be Read");
+
+  cout << "** Pipelined **" << endl;
+  PrintVector(m_pipelined_input_indexes, "Pipelined Input Indexes");
+  PrintVector(m_pipelined_output_indexes, "Pipelined Output Indexes");
+
+  cout << "** Prefetched **" << endl;
+  PrintVector(m_prefetched_input_indexes, "Prefetched Input Indexes");
+  PrintVector(m_pipelined_not_prefetched_input_indexes,
+              "Pipelined But Not Prefetched Input Indexes");
+
+  cout << "** Prefetched wrt pipelined indexes **" << endl;
+  PrintVector(m_pipelined_input_indexes_that_are_prefetched,
+              "Prefetched Input Indexes wrt Pipelined Inputs");
+  PrintVector(m_pipelined_input_indexes_that_are_not_prefetched,
+              "Not Prefetched Input Indexes wrt Pipelined Inputs");
+}
+
 //---------------------------------------------------------------------------
 //  NGraphTensorManager::~NGraphTensorManager
 //---------------------------------------------------------------------------
diff --git a/ngraph_bridge/ngraph_tensor_manager.h b/ngraph_bridge/ngraph_tensor_manager.h
index 9143241fb..73f2ca9d4 100644
--- a/ngraph_bridge/ngraph_tensor_manager.h
+++ b/ngraph_bridge/ngraph_tensor_manager.h
@@ -109,6 +109,8 @@ class NGraphTensorManager {
   Status GetOutputVariableCopyToTF(const int& output_index,
                                    bool* output_var_copy_to_tf);
 
+  void Print();
+
  private:
   void Initialize();
   string m_ng_encap_node_name;
diff --git a/ngraph_bridge/ngraph_tracked_variable.cc b/ngraph_bridge/ngraph_tracked_variable.cc
index bf277b6c1..22b1e584e 100644
--- a/ngraph_bridge/ngraph_tracked_variable.cc
+++ b/ngraph_bridge/ngraph_tracked_variable.cc
@@ -60,7 +60,6 @@ class NGraphVar : public ResourceBase {
  private:
   mutex mu_;
   Tensor tensor_;
-
   ~NGraphVar() override {}
 };
 
@@ -108,7 +107,7 @@ NGraphVariableOp::~NGraphVariableOp() { tracker_->Unref(); }
 void NGraphVariableOp::Compute(OpKernelContext* ctx) {
   mutex_lock l(init_mu_);
   std::ostringstream oss;
-  oss << "NGraphVariable: " << my_instance_id << ": " << name();
+  oss << "NGVariable::Compute::" << name();
   ngraph::Event event_compute(oss.str(), name(), "");
 
   if (!initialized_) {
@@ -182,6 +181,7 @@ void NGraphVariableOp::Compute(OpKernelContext* ctx) {
     ctx->record_persistent_memory_allocation(var->tensor()->AllocatedBytes());
   }
   var->Unref();
+  event_compute.Stop();
   ngraph::Event::write_trace(event_compute);
 }
 
diff --git a/ngraph_bridge/enable_variable_ops/ngraph_var.cc b/ngraph_bridge/ngraph_var.cc
similarity index 98%
rename from ngraph_bridge/enable_variable_ops/ngraph_var.cc
rename to ngraph_bridge/ngraph_var.cc
index efab9e7c0..1fa6001bf 100644
--- a/ngraph_bridge/enable_variable_ops/ngraph_var.cc
+++ b/ngraph_bridge/ngraph_var.cc
@@ -24,10 +24,10 @@
 #include "ngraph/event_tracing.hpp"
 #include "ngraph/runtime/backend.hpp"
 
-#include "ngraph_bridge/enable_variable_ops/ngraph_var.h"
 #include "ngraph_bridge/ngraph_backend_manager.h"
 #include "ngraph_bridge/ngraph_freshness_tracker.h"
 #include "ngraph_bridge/ngraph_utils.h"
+#include "ngraph_bridge/ngraph_var.h"
 
 using namespace std;
 namespace ng = ngraph;
diff --git a/ngraph_bridge/enable_variable_ops/ngraph_var.h b/ngraph_bridge/ngraph_var.h
similarity index 100%
rename from ngraph_bridge/enable_variable_ops/ngraph_var.h
rename to ngraph_bridge/ngraph_var.h
diff --git a/test/graph_rewrites/test_ng_var_update_ng_tensor.cc b/test/graph_rewrites/test_ng_var_update_ng_tensor.cc
index 0af2c7a57..924c54266 100644
--- a/test/graph_rewrites/test_ng_var_update_ng_tensor.cc
+++ b/test/graph_rewrites/test_ng_var_update_ng_tensor.cc
@@ -23,10 +23,10 @@
 #include "tensorflow/core/platform/test.h"
 
 #include "logging/tf_graph_writer.h"
-#include "ngraph_bridge/enable_variable_ops/ngraph_var.h"
 #include "ngraph_bridge/enable_variable_ops/ngraph_variable_update_ng_tensor_op.h"
 #include "ngraph_bridge/ngraph_rewrite_for_tracking.h"
 #include "ngraph_bridge/ngraph_utils.h"
+#include "ngraph_bridge/ngraph_var.h"
 #include "test/test_utilities.h"
 
 namespace tensorflow {
diff --git a/test/python/test_flib.py b/test/python/test_flib.py
index 079e34449..f0c9b5b59 100644
--- a/test/python/test_flib.py
+++ b/test/python/test_flib.py
@@ -46,6 +46,7 @@ def test_flib_1(self):
 
             res1 = self.with_ngraph(sess_fn)
             res2 = self.without_ngraph(sess_fn)
+
             exp = [np.full((2, 3), 3.0), np.full((2, 3), 0.95257413)]
             # Note both run on Host (because NgraphEncapsulate can only run on host)
             assert np.isclose(res1, res2).all()
diff --git a/test/test_ng_var_update_ng_tensor_kernel.cc b/test/test_ng_var_update_ng_tensor_kernel.cc
index 51742fcc9..4612d156b 100644
--- a/test/test_ng_var_update_ng_tensor_kernel.cc
+++ b/test/test_ng_var_update_ng_tensor_kernel.cc
@@ -30,9 +30,9 @@
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"
 
-#include "ngraph_bridge/enable_variable_ops/ngraph_var.h"
 #include "ngraph_bridge/enable_variable_ops/ngraph_variable_update_ng_tensor_op.h"
 #include "ngraph_bridge/ngraph_utils.h"
+#include "ngraph_bridge/ngraph_var.h"
 #include "test/test_utilities.h"
 #include "test/tf_fake_input.h"
 
diff --git a/tools/test_utils.py b/tools/test_utils.py
index 12f2ead29..e8f5752d3 100755
--- a/tools/test_utils.py
+++ b/tools/test_utils.py
@@ -108,7 +108,7 @@ def run_ngtf_pytests(venv_dir, build_dir):
     build_dir = os.path.abspath(build_dir)
     venv_dir = os.path.abspath(venv_dir)
     mnist_dir = os.path.abspath(build_dir + '/examples/mnist/')
-
+    axpy_dir = os.path.abspath(build_dir + '/examples/')
     test_dir = os.path.join(build_dir, "test")
     test_dir = os.path.join(test_dir, "python")
 
@@ -130,7 +130,8 @@ def run_ngtf_pytests(venv_dir, build_dir):
         build_dir) + " --ignore=" + build_dir + "/test/python/bfloat16"
     env = os.environ.copy()
     new_paths = venv_dir + '/bin/python3:' + os.path.abspath(
-        build_dir) + ":" + os.path.abspath(mnist_dir)
+        build_dir) + ":" + os.path.abspath(axpy_dir) + ":" + os.path.abspath(
+            mnist_dir)
     if 'PYTHONPATH' in env:
         env["PYTHONPATH"] = new_paths + ":" + env["PYTHONPATH"]
     else:

From da1adabfbd48e587d7cb1923e6233ad8ed740ac7 Mon Sep 17 00:00:00 2001
From: Sindhura Kantamani <sindhura.kantamani@intel.com>
Date: Fri, 20 Dec 2019 11:35:08 -0800
Subject: [PATCH 11/13] Update
 examples/cpp/inference/infer_multiple_networks.cc

Co-Authored-By: Shrestha Malik <shrestha.malik@intel.com>
---
 examples/cpp/inference/infer_multiple_networks.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/cpp/inference/infer_multiple_networks.cc b/examples/cpp/inference/infer_multiple_networks.cc
index 48983bbde..904a12801 100644
--- a/examples/cpp/inference/infer_multiple_networks.cc
+++ b/examples/cpp/inference/infer_multiple_networks.cc
@@ -165,6 +165,7 @@ int main(int argc, char** argv) {
   }
 
 // Register cpu backend for static linking
+// [TODO]: Revisit this to see if we can remove registering here and register only in BackendManager.
 #if defined(NGRAPH_BRIDGE_STATIC_LIB_ENABLE)
   ngraph_register_cpu_backend();
 #endif

From 6706cbfc6ab43402d5b6dd0267db88ff6d5f7528 Mon Sep 17 00:00:00 2001
From: sindhu-nervana <sindhura.kantamani@intel.com>
Date: Fri, 20 Dec 2019 11:43:57 -0800
Subject: [PATCH 12/13] code format

---
 examples/cpp/inference/infer_multiple_networks.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/cpp/inference/infer_multiple_networks.cc b/examples/cpp/inference/infer_multiple_networks.cc
index 904a12801..8dc68c227 100644
--- a/examples/cpp/inference/infer_multiple_networks.cc
+++ b/examples/cpp/inference/infer_multiple_networks.cc
@@ -165,7 +165,8 @@ int main(int argc, char** argv) {
   }
 
 // Register cpu backend for static linking
-// [TODO]: Revisit this to see if we can remove registering here and register only in BackendManager.
+// [TODO]: Revisit this to see if we can remove registering here and register
+// only in BackendManager.
 #if defined(NGRAPH_BRIDGE_STATIC_LIB_ENABLE)
   ngraph_register_cpu_backend();
 #endif

From 367d3db4008d1cfe003c6341e1a2e0f96a3e5218 Mon Sep 17 00:00:00 2001
From: kanvi-nervana <kanvi.khanna@intel.com>
Date: Fri, 20 Dec 2019 13:27:28 -0800
Subject: [PATCH 13/13] Kanvi/Add asserts in some python tests (#398)

---
 test/python/test_sigmoid.py |  3 ++-
 test/python/test_sign.py    | 10 ++++++----
 test/python/test_softmax.py | 15 +++++++++------
 test/python/test_stack.py   |  3 ++-
 4 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/test/python/test_sigmoid.py b/test/python/test_sigmoid.py
index c5f8c1470..91b90f016 100644
--- a/test/python/test_sigmoid.py
+++ b/test/python/test_sigmoid.py
@@ -52,4 +52,5 @@ def test_sigmoid(self):
                                             y: y_np,
                                             z: z_np
                                         })
-        np.allclose(self.with_ngraph(sess_fn), self.without_ngraph(sess_fn))
+        assert np.allclose(
+            self.with_ngraph(sess_fn), self.without_ngraph(sess_fn))
diff --git a/test/python/test_sign.py b/test/python/test_sign.py
index 781674960..c53ba26bf 100644
--- a/test/python/test_sign.py
+++ b/test/python/test_sign.py
@@ -35,8 +35,9 @@ def test_sign_1d(self, test_input, expected):
         val = tf.placeholder(tf.float32, shape=(1,))
         out = tf.sign(val)
         sess_fn = lambda sess: sess.run((out,), feed_dict={val: (test_input,)})
-        np.allclose(self.with_ngraph(sess_fn), self.without_ngraph(sess_fn))
-        np.allclose(self.with_ngraph(sess_fn), expected)
+        assert np.allclose(
+            self.with_ngraph(sess_fn), self.without_ngraph(sess_fn))
+        assert np.allclose(self.with_ngraph(sess_fn), expected)
 
     def test_sign_2d(self):
         test_input = ((1.5, -2.5, -3.5), (-4.5, 5.5, 0))
@@ -44,5 +45,6 @@ def test_sign_2d(self):
         val = tf.placeholder(tf.float32, shape=(2, 3))
         out = tf.sign(val)
         sess_fn = lambda sess: sess.run((out,), feed_dict={val: test_input})
-        np.allclose(self.with_ngraph(sess_fn), self.without_ngraph(sess_fn))
-        np.allclose(self.with_ngraph(sess_fn), expected)
+        assert np.allclose(
+            self.with_ngraph(sess_fn), self.without_ngraph(sess_fn))
+        assert np.allclose(self.with_ngraph(sess_fn), expected)
diff --git a/test/python/test_softmax.py b/test/python/test_softmax.py
index 87fcb0d84..2a8cbae94 100644
--- a/test/python/test_softmax.py
+++ b/test/python/test_softmax.py
@@ -43,8 +43,9 @@ def test_softmax_2d(self):
         expected = a_np
         a = tf.nn.softmax(x)
         sess_fn = lambda sess: sess.run((a), feed_dict={x: x_np})
-        np.allclose(self.with_ngraph(sess_fn), self.without_ngraph(sess_fn))
-        np.allclose(self.with_ngraph(sess_fn), expected)
+        assert np.allclose(
+            self.with_ngraph(sess_fn), self.without_ngraph(sess_fn))
+        assert np.allclose(self.with_ngraph(sess_fn), expected)
 
     def test_softmax_3d(self):
         x = tf.placeholder(tf.float32, shape=(2, 3, 2))
@@ -59,8 +60,9 @@ def test_softmax_3d(self):
         expected = a_np
         a = tf.nn.softmax(x)
         sess_fn = lambda sess: sess.run((a), feed_dict={x: x_np})
-        np.allclose(self.with_ngraph(sess_fn), self.without_ngraph(sess_fn))
-        np.allclose(self.with_ngraph(sess_fn), expected)
+        assert np.allclose(
+            self.with_ngraph(sess_fn), self.without_ngraph(sess_fn))
+        assert np.allclose(self.with_ngraph(sess_fn), expected)
 
     def test_softmax_4d(self):
         x = tf.placeholder(tf.float32, shape=(2, 3, 2, 4))
@@ -75,5 +77,6 @@ def test_softmax_4d(self):
         expected = a_np
         a = tf.nn.softmax(x)
         sess_fn = lambda sess: sess.run((a), feed_dict={x: x_np})
-        np.allclose(self.with_ngraph(sess_fn), self.without_ngraph(sess_fn))
-        np.allclose(self.with_ngraph(sess_fn), expected)
+        assert np.allclose(
+            self.with_ngraph(sess_fn), self.without_ngraph(sess_fn))
+        assert np.allclose(self.with_ngraph(sess_fn), expected)
diff --git a/test/python/test_stack.py b/test/python/test_stack.py
index ee5f0c8e5..b44ae4d34 100644
--- a/test/python/test_stack.py
+++ b/test/python/test_stack.py
@@ -50,4 +50,5 @@ def test_stack(self, shapes, axis):
         a = tf.stack(placeholders, axis)
         sess_fn = lambda sess: sess.run(
             [a], feed_dict={p: v for p, v in zip(placeholders, values)})
-        np.allclose(self.with_ngraph(sess_fn), self.without_ngraph(sess_fn))
+        assert np.allclose(
+            self.with_ngraph(sess_fn), self.without_ngraph(sess_fn))