From 230f11e983c80502dbc794bcd9803d1bacabdd56 Mon Sep 17 00:00:00 2001 From: Sayantan Sarkar Date: Tue, 3 Dec 2019 19:07:18 -0800 Subject: [PATCH 01/18] Use backend to create tensor --- ngraph_bridge/ngraph_encapsulate_impl.cc | 9 +++++---- ngraph_bridge/ngraph_encapsulate_op.cc | 8 ++++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_impl.cc b/ngraph_bridge/ngraph_encapsulate_impl.cc index ab9a779c1..6d999422d 100644 --- a/ngraph_bridge/ngraph_encapsulate_impl.cc +++ b/ngraph_bridge/ngraph_encapsulate_impl.cc @@ -432,17 +432,18 @@ std::shared_ptr NGraphEncapsulateImpl::GetCurrentNgTensor( // values. ie, it will not reuse the same space if its rewritten it bool tf_tensor_has_changed = current_tf_ptr != last_tf_ptr; bool no_ng_tensor_found = last_ng_tensor == nullptr; - bool is_cpu = m_op_backend_name == "CPU"; + bool is_cpu_or_nnpi = + (m_op_backend_name == "CPU") || (m_op_backend_name == "NNPI"); // We need to check last_ng_tensor != nullptr, since there are cases where // at the first call to the ng_exec, both current_dst_ptr (when the // output is a 0-sized tensor) and last_dst_ptr (uninitialized at the // first call) are nullptr // A new tensor needs to be created for sure if no_ng_tensor_found - // Additionally for CPU, it needs to be created if tf_tensor_has_changed, + // Additionally for CPU/NNPI, it needs to be created if tf_tensor_has_changed, // for others, we do not create bool need_new_tensor_creation; - if (is_cpu) { + if (is_cpu_or_nnpi) { need_new_tensor_creation = no_ng_tensor_found || tf_tensor_has_changed; } else { need_new_tensor_creation = no_ng_tensor_found; @@ -464,7 +465,7 @@ std::shared_ptr NGraphEncapsulateImpl::GetCurrentNgTensor( current_ng_tensor = tensor_from_pipeline; } else { if (need_new_tensor_creation) { - if (is_cpu) { + if (is_cpu_or_nnpi) { current_ng_tensor = op_backend->create_tensor(ng_element_type, ng_shape, current_tf_ptr); } else { diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 1bd7a7d7e..a5f57f70a 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -198,9 +198,13 @@ NGraphEncapsulateOp::NGraphEncapsulateOp(OpKernelConstruction* ctx) BackendManager::SetConfig(ng_encap_impl.GetOpBackend(), additional_attribute_map); - ng_encap_impl.SetExecCanCreateTensor( + // For NNPI (even though executable can create tensor) use backend to create + // tensor + bool executable_create_tensor = + (backend_name != "NNPI") && BackendManager::GetBackend(ng_encap_impl.GetOpBackend()) - ->executable_can_create_tensors()); + ->executable_can_create_tensors(); + ng_encap_impl.SetExecCanCreateTensor(executable_create_tensor); NGRAPH_VLOG(5) << "Executable can " << (ng_encap_impl.GetExecCanCreateTensor() ? "" : "not") << " create tensors"; From 84693e4332f27b486b710ccd1d41841646bf0d8d Mon Sep 17 00:00:00 2001 From: Sayantan Sarkar Date: Tue, 3 Dec 2019 19:27:17 -0800 Subject: [PATCH 02/18] Minor log --- ngraph_bridge/ngraph_encapsulate_impl.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/ngraph_bridge/ngraph_encapsulate_impl.cc b/ngraph_bridge/ngraph_encapsulate_impl.cc index 6d999422d..50aa71649 100644 --- a/ngraph_bridge/ngraph_encapsulate_impl.cc +++ b/ngraph_bridge/ngraph_encapsulate_impl.cc @@ -466,6 +466,7 @@ std::shared_ptr NGraphEncapsulateImpl::GetCurrentNgTensor( } else { if (need_new_tensor_creation) { if (is_cpu_or_nnpi) { + NGRAPH_VLOG(0) << "Backend creating tensor with tf pointer"; current_ng_tensor = op_backend->create_tensor(ng_element_type, ng_shape, current_tf_ptr); } else { From de6b5bd29f2c1fd69270bd3a41ee3adb14b105b3 Mon Sep 17 00:00:00 2001 From: Sayantan Sarkar Date: Tue, 3 Dec 2019 19:47:46 -0800 Subject: [PATCH 03/18] Fix backend detection --- ngraph_bridge/ngraph_encapsulate_impl.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_impl.cc b/ngraph_bridge/ngraph_encapsulate_impl.cc index 50aa71649..c0b0bc4f0 100644 --- a/ngraph_bridge/ngraph_encapsulate_impl.cc +++ b/ngraph_bridge/ngraph_encapsulate_impl.cc @@ -432,8 +432,9 @@ std::shared_ptr NGraphEncapsulateImpl::GetCurrentNgTensor( // values. ie, it will not reuse the same space if its rewritten it bool tf_tensor_has_changed = current_tf_ptr != last_tf_ptr; bool no_ng_tensor_found = last_ng_tensor == nullptr; - bool is_cpu_or_nnpi = - (m_op_backend_name == "CPU") || (m_op_backend_name == "NNPI"); + // m_op_backend_name might be BE:0, check if it starts with BE + bool is_cpu_or_nnpi = (m_op_backend_name.find("CPU") == 0) || + (m_op_backend_name.find("NNPI") == 0); // We need to check last_ng_tensor != nullptr, since there are cases where // at the first call to the ng_exec, both current_dst_ptr (when the @@ -466,7 +467,6 @@ std::shared_ptr NGraphEncapsulateImpl::GetCurrentNgTensor( } else { if (need_new_tensor_creation) { if (is_cpu_or_nnpi) { - NGRAPH_VLOG(0) << "Backend creating tensor with tf pointer"; current_ng_tensor = op_backend->create_tensor(ng_element_type, ng_shape, current_tf_ptr); } else { From 91cfc0aa2be809802c6a769f8311f4be6eca7fd3 Mon Sep 17 00:00:00 2001 From: Sayantan Sarkar Date: Wed, 4 Dec 2019 10:15:42 -0800 Subject: [PATCH 04/18] Make sure executable_can_create_tensors executes and is not optimized away --- ngraph_bridge/ngraph_encapsulate_op.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index a5f57f70a..e5ab0ead6 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -200,10 +200,12 @@ NGraphEncapsulateOp::NGraphEncapsulateOp(OpKernelConstruction* ctx) // For NNPI (even though executable can create tensor) use backend to create // tensor + // Keep the executable_can_create_tensors check before the + // backend_name!="NNPI" bool executable_create_tensor = - (backend_name != "NNPI") && BackendManager::GetBackend(ng_encap_impl.GetOpBackend()) - ->executable_can_create_tensors(); + ->executable_can_create_tensors() && + (backend_name != "NNPI"); ng_encap_impl.SetExecCanCreateTensor(executable_create_tensor); NGRAPH_VLOG(5) << "Executable can " << (ng_encap_impl.GetExecCanCreateTensor() ? "" : "not") From 01aeb54508cd86ad65f05013e459a56ed10d04c4 Mon Sep 17 00:00:00 2001 From: Sayantan Sarkar Date: Thu, 5 Dec 2019 17:52:14 -0800 Subject: [PATCH 05/18] Prints and a test --- ngraph_bridge/ngraph_encapsulate_impl.cc | 5 +++- ngraph_bridge/ngraph_encapsulate_op.cc | 2 ++ test/tf_exec.cpp | 35 ++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/ngraph_bridge/ngraph_encapsulate_impl.cc b/ngraph_bridge/ngraph_encapsulate_impl.cc index c0b0bc4f0..b1d0f0871 100644 --- a/ngraph_bridge/ngraph_encapsulate_impl.cc +++ b/ngraph_bridge/ngraph_encapsulate_impl.cc @@ -322,6 +322,7 @@ Status NGraphEncapsulateImpl::AllocateNGInputTensors( current_src_ptr, last_src_ptr, last_ng_tensor, false, ng_exec, op_backend, ng_element_type, ng_shape, m_executable_can_create_tensor ? inp_group_from_pipeline[i] : nullptr); + cout << "Using input tensor ptr: " << current_ng_tensor << "\n"; bool is_cpu = m_op_backend_name == "CPU"; if (!is_cpu && current_ng_tensor->get_stale()) { @@ -410,7 +411,7 @@ Status NGraphEncapsulateImpl::AllocateNGOutputTensors( current_dst_ptr, last_dst_ptr, last_ng_tensor, true, ng_exec, op_backend, ng_element_type, ng_shape, m_executable_can_create_tensor ? out_group_from_pipeline[i] : nullptr); - + cout << "Using output tensor ptr: " << current_ng_tensor << "\n"; current_ng_tensor->set_stale(true); output_caches[i] = std::make_pair(current_dst_ptr, current_ng_tensor); ng_outputs.push_back(current_ng_tensor); @@ -431,6 +432,7 @@ std::shared_ptr NGraphEncapsulateImpl::GetCurrentNgTensor( // NOTE: we assume that TF's pointers WILL change if it actually changes // values. ie, it will not reuse the same space if its rewritten it bool tf_tensor_has_changed = current_tf_ptr != last_tf_ptr; + cout << "tf_tensor_has_changed: " << tf_tensor_has_changed << "\n"; bool no_ng_tensor_found = last_ng_tensor == nullptr; // m_op_backend_name might be BE:0, check if it starts with BE bool is_cpu_or_nnpi = (m_op_backend_name.find("CPU") == 0) || @@ -467,6 +469,7 @@ std::shared_ptr NGraphEncapsulateImpl::GetCurrentNgTensor( } else { if (need_new_tensor_creation) { if (is_cpu_or_nnpi) { + cout << "Backend creating tensor with pointer: " << current_tf_ptr << "\n"; current_ng_tensor = op_backend->create_tensor(ng_element_type, ng_shape, current_tf_ptr); } else { diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index e5ab0ead6..7b1976fa3 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -726,6 +726,8 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) { ngraph::Event::write_trace(event_copy_output); ngraph::Event::write_trace(event); + cout << "\n"; + } // end compute int NGraphEncapsulateImpl::s_instance_count = 0; diff --git a/test/tf_exec.cpp b/test/tf_exec.cpp index 9cea111de..043bca0a8 100644 --- a/test/tf_exec.cpp +++ b/test/tf_exec.cpp @@ -138,6 +138,41 @@ TEST(tf_exec, SingleGraphOn2Threads) { } } + +TEST(tf_exec, model) { + string graph_name = "test_axpy.pbtxt"; + + unique_ptr session; + ASSERT_OK(CreateSession(graph_name, "CPU", session)); + + string inp_tensor_name_0{"x"}; + string inp_tensor_name_1{"y"}; + string out_tensor_name{"add"}; + std::vector out_tensor_vals; + + Tensor inp_tensor_val(tensorflow::DT_FLOAT, + tensorflow::TensorShape({2, 3})); + Tensor out_tensor_expected_val(tensorflow::DT_FLOAT, + tensorflow::TensorShape({2, 3})); + for (int i = 0; i < 10; i++) { + + vector in_vals(6, float(i)); + AssignInputValues(inp_tensor_val, in_vals); + + vector out_vals(6, 6.0 * float(i)); + AssignInputValues(out_tensor_expected_val, out_vals); + + std::vector> inputs = { + {inp_tensor_name_0, inp_tensor_val}, + {inp_tensor_name_1, inp_tensor_val}}; + + ASSERT_OK( + session->Run(inputs, {out_tensor_name}, {}, &out_tensor_vals)); + Compare(out_tensor_vals, {out_tensor_expected_val}); + } +} + + TEST(tf_exec, hello_world) { Scope root = Scope::NewRootScope(); From acfd1654e6ad84b6a16934481b5ca06435e8b88d Mon Sep 17 00:00:00 2001 From: Sayantan Sarkar Date: Fri, 6 Dec 2019 11:20:37 -0800 Subject: [PATCH 06/18] Some refactoring, Style and add back NG_TRACE for output --- ngraph_bridge/ngraph_encapsulate_impl.cc | 4 +- ngraph_bridge/ngraph_encapsulate_op.cc | 75 ++++++++++-------------- test/test_utilities.h | 1 + test/tf_exec.cpp | 55 ++++++++--------- 4 files changed, 58 insertions(+), 77 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_impl.cc b/ngraph_bridge/ngraph_encapsulate_impl.cc index e6049dad3..d099600d7 100644 --- a/ngraph_bridge/ngraph_encapsulate_impl.cc +++ b/ngraph_bridge/ngraph_encapsulate_impl.cc @@ -454,7 +454,8 @@ std::shared_ptr NGraphEncapsulateImpl::GetCurrentNgTensor( } else { if (need_new_tensor_creation) { if (is_cpu_or_nnpi) { - cout << "Backend creating tensor with pointer: " << current_tf_ptr << "\n"; + cout << "Backend creating tensor with pointer: " << current_tf_ptr + << "\n"; current_ng_tensor = op_backend->create_tensor(ng_element_type, ng_shape, current_tf_ptr); } else { @@ -581,7 +582,6 @@ void NGraphEncapsulateImpl::DumpNgFunction( StringToFile(file_name, m_serialized_ng_function_map[ng_exec]); } - Status NGraphEncapsulateImpl::GetPersistentTFOutputTensor( std::shared_ptr exec, std::vector& tf_output_tensors) { diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 954f7295f..95eba94dd 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -209,7 +209,6 @@ NGraphEncapsulateOp::NGraphEncapsulateOp(OpKernelConstruction* ctx) << (ng_encap_impl.GetExecCanCreateTensor() ? "" : "not") << " create tensors"; - const char* not_persistent_flag = std::getenv("NGRAPH_TF_NOT_PERSISTENT"); m_use_persistent = (not_persistent_flag == nullptr); @@ -357,51 +356,18 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) { vector> ng_outputs; std::vector tf_output_tensors; std::vector cached_persistent_output_tensors; + bool present_in_cache = false; - if (m_use_persistent) { - bool present_in_cache = false; - present_in_cache = ng_encap_impl.PersistentOutputsExist(ng_exec); - if (present_in_cache) { - ng_encap_impl.GetPersistentTFOutputTensor( - ng_exec, cached_persistent_output_tensors); - } - - for (auto i = 0; i < ng_exec->get_results().size(); i++) { - auto ng_element = ng_exec->get_results()[i]; - auto ng_shape = ng_element->get_shape(); - auto ng_element_type = ng_element->get_element_type(); - // Create the TF output tensor - vector dims; - for (auto dim : ng_shape) { - dims.push_back(dim); - } - TensorShape tf_shape(dims); - Tensor* output_tensor = nullptr; + { + NG_TRACE("NGTF_Output_Alloc", ""); + if (m_use_persistent) { + present_in_cache = ng_encap_impl.PersistentOutputsExist(ng_exec); if (present_in_cache) { - output_tensor = cached_persistent_output_tensors[i].AccessTensor(ctx); - } else { - // create a persistent tensor - PersistentTensor out_persistent; - OP_REQUIRES_OK(ctx, ctx->allocate_persistent( - ctx->expected_output_dtype(i), tf_shape, - &out_persistent, &output_tensor)); - cout << "ctx->expected_output_dtype(i): " - << ctx->expected_output_dtype(i) << "\n"; - cout << "Creation time: " << output_tensor->dtype() << "\n"; - cached_persistent_output_tensors.push_back(out_persistent); + ng_encap_impl.GetPersistentTFOutputTensor( + ng_exec, cached_persistent_output_tensors); } - tf_output_tensors.push_back(output_tensor); } - if (!present_in_cache){ - OP_REQUIRES_OK(ctx, ng_encap_impl.RegisterOutputPersistentTensors( - ng_exec, cached_persistent_output_tensors)); - } - OP_REQUIRES_OK(ctx, ng_encap_impl.AllocateNGOutputTensors( - tf_output_tensors, ng_exec, out_group_from_pipeline, - op_backend, ng_outputs)); - } else - { - NG_TRACE("NGTF_Output_Alloc", ""); + for (auto i = 0; i < ng_exec->get_results().size(); i++) { auto ng_element = ng_exec->get_results()[i]; auto ng_shape = ng_element->get_shape(); @@ -414,12 +380,13 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) { } TensorShape tf_shape(dims); Tensor* output_tensor = nullptr; - OP_REQUIRES_OK(ctx, ctx->allocate_output(i, tf_shape, &output_tensor)); - tf_output_tensors.push_back(output_tensor); // Make sure the nGraph-inferred element type agrees with what TensorFlow // expected. ng::element::Type expected_elem_type; + // TODO, we only need to do these checks once when the exec was + // created/compiled, not again and again + OP_REQUIRES_OK( ctx, TFDataTypeToNGraphElementType(ctx->expected_output_dtype(i), &expected_elem_type)); @@ -427,8 +394,26 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) { ctx, ng_element_type == expected_elem_type, errors::Internal("Element type inferred by nGraph does not match " "the element type expected by TensorFlow")); - } + if (m_use_persistent) { + if (present_in_cache) { + output_tensor = cached_persistent_output_tensors[i].AccessTensor(ctx); + } else { + // create a persistent tensor + PersistentTensor out_persistent; + OP_REQUIRES_OK(ctx, ctx->allocate_persistent( + ctx->expected_output_dtype(i), tf_shape, + &out_persistent, &output_tensor)); + cout << "ctx->expected_output_dtype(i): " + << ctx->expected_output_dtype(i) << "\n"; + cout << "Creation time: " << output_tensor->dtype() << "\n"; + cached_persistent_output_tensors.push_back(out_persistent); + } + } else { + OP_REQUIRES_OK(ctx, ctx->allocate_output(i, tf_shape, &output_tensor)); + } + tf_output_tensors.push_back(output_tensor); + } OP_REQUIRES_OK(ctx, ng_encap_impl.AllocateNGOutputTensors( tf_output_tensors, ng_exec, out_group_from_pipeline, op_backend, ng_outputs)); diff --git a/test/test_utilities.h b/test/test_utilities.h index 349ddf784..15a269ba9 100644 --- a/test/test_utilities.h +++ b/test/test_utilities.h @@ -147,6 +147,7 @@ void Compare(const Tensor& T1, const Tensor& T2, for (int k = 0; k < T_size; k++) { auto a = T1_data[k]; auto b = T2_data[k]; + cout << "a: " << a << ", b: " << b << "\n"; bool rt = Compare(a, b, rtol, atol); EXPECT_TRUE(rt) << " TF output " << a << endl << " NG output " << b; } diff --git a/test/tf_exec.cpp b/test/tf_exec.cpp index 043bca0a8..6609302da 100644 --- a/test/tf_exec.cpp +++ b/test/tf_exec.cpp @@ -138,40 +138,35 @@ TEST(tf_exec, SingleGraphOn2Threads) { } } - TEST(tf_exec, model) { string graph_name = "test_axpy.pbtxt"; - unique_ptr session; - ASSERT_OK(CreateSession(graph_name, "CPU", session)); - - string inp_tensor_name_0{"x"}; - string inp_tensor_name_1{"y"}; - string out_tensor_name{"add"}; - std::vector out_tensor_vals; - - Tensor inp_tensor_val(tensorflow::DT_FLOAT, - tensorflow::TensorShape({2, 3})); - Tensor out_tensor_expected_val(tensorflow::DT_FLOAT, - tensorflow::TensorShape({2, 3})); - for (int i = 0; i < 10; i++) { - - vector in_vals(6, float(i)); - AssignInputValues(inp_tensor_val, in_vals); - - vector out_vals(6, 6.0 * float(i)); - AssignInputValues(out_tensor_expected_val, out_vals); - - std::vector> inputs = { - {inp_tensor_name_0, inp_tensor_val}, - {inp_tensor_name_1, inp_tensor_val}}; - - ASSERT_OK( - session->Run(inputs, {out_tensor_name}, {}, &out_tensor_vals)); - Compare(out_tensor_vals, {out_tensor_expected_val}); - } -} + unique_ptr session; + ASSERT_OK(CreateSession(graph_name, "CPU", session)); + + string inp_tensor_name_0{"x"}; + string inp_tensor_name_1{"y"}; + string out_tensor_name{"add"}; + std::vector out_tensor_vals; + Tensor inp_tensor_val(tensorflow::DT_FLOAT, tensorflow::TensorShape({2, 3})); + Tensor out_tensor_expected_val(tensorflow::DT_FLOAT, + tensorflow::TensorShape({2, 3})); + for (int i = 0; i < 10; i++) { + vector in_vals(6, float(i)); + AssignInputValues(inp_tensor_val, in_vals); + + vector out_vals(6, 6.0 * float(i)); + AssignInputValues(out_tensor_expected_val, out_vals); + + std::vector> inputs = { + {inp_tensor_name_0, inp_tensor_val}, + {inp_tensor_name_1, inp_tensor_val}}; + + ASSERT_OK(session->Run(inputs, {out_tensor_name}, {}, &out_tensor_vals)); + Compare(out_tensor_vals, {out_tensor_expected_val}); + } +} TEST(tf_exec, hello_world) { Scope root = Scope::NewRootScope(); From bd4e4f18949330ccaf677af361ed7b86e7e7d0b4 Mon Sep 17 00:00:00 2001 From: Sayantan Sarkar Date: Fri, 6 Dec 2019 11:32:23 -0800 Subject: [PATCH 07/18] Remove print from compare --- test/test_utilities.h | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_utilities.h b/test/test_utilities.h index 15a269ba9..349ddf784 100644 --- a/test/test_utilities.h +++ b/test/test_utilities.h @@ -147,7 +147,6 @@ void Compare(const Tensor& T1, const Tensor& T2, for (int k = 0; k < T_size; k++) { auto a = T1_data[k]; auto b = T2_data[k]; - cout << "a: " << a << ", b: " << b << "\n"; bool rt = Compare(a, b, rtol, atol); EXPECT_TRUE(rt) << " TF output " << a << endl << " NG output " << b; } From 6d8651ded0744cdfc8eae9602fa7974957eebefa Mon Sep 17 00:00:00 2001 From: Sayantan Sarkar Date: Fri, 6 Dec 2019 11:52:23 -0800 Subject: [PATCH 08/18] Fix --- ngraph_bridge/ngraph_encapsulate_op.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 95eba94dd..41c268265 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -414,6 +414,10 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) { } tf_output_tensors.push_back(output_tensor); } + if (!present_in_cache) { + OP_REQUIRES_OK(ctx, ng_encap_impl.RegisterOutputPersistentTensors( + ng_exec, cached_persistent_output_tensors)); + } OP_REQUIRES_OK(ctx, ng_encap_impl.AllocateNGOutputTensors( tf_output_tensors, ng_exec, out_group_from_pipeline, op_backend, ng_outputs)); From e1f5ca64455a682cf304a69ceb4ebb7854ae61e3 Mon Sep 17 00:00:00 2001 From: Sayantan Sarkar Date: Mon, 9 Dec 2019 00:00:18 -0800 Subject: [PATCH 09/18] Setting output tensor slightly differently --- ngraph_bridge/ngraph_encapsulate_op.cc | 17 +++++----- test/tf_exec.cpp | 43 ++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 7 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 41c268265..19e8a87d1 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -363,8 +363,8 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) { if (m_use_persistent) { present_in_cache = ng_encap_impl.PersistentOutputsExist(ng_exec); if (present_in_cache) { - ng_encap_impl.GetPersistentTFOutputTensor( - ng_exec, cached_persistent_output_tensors); + OP_REQUIRES_OK(ctx, ng_encap_impl.GetPersistentTFOutputTensor( + ng_exec, cached_persistent_output_tensors)); } } @@ -657,14 +657,17 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) { if (m_use_persistent) { for (int out_idx = 0; out_idx < ng_exec->get_results().size(); out_idx++) { + OP_REQUIRES_OK(ctx, ng_encap_impl.GetPersistentTFOutputTensor( + ng_exec, cached_persistent_output_tensors)); + auto out_tensor = + cached_persistent_output_tensors[out_idx].AccessTensor(ctx); + // TODO uncomment cout << "----\nSetting output " << out_idx << " --- " - << tf_output_tensors[out_idx]->dtype() << " " - << tf_output_tensors[out_idx]->NumElements() << "----\n"; + << out_tensor->dtype() << " " << out_tensor->NumElements() + << "----\n"; // ctx->set_output(out_idx, *tf_output_tensors[out_idx]); - ctx->set_output( - out_idx, - *(cached_persistent_output_tensors[out_idx].AccessTensor(ctx))); + ctx->set_output(out_idx, *out_tensor); } } diff --git a/test/tf_exec.cpp b/test/tf_exec.cpp index 6609302da..fb8559e84 100644 --- a/test/tf_exec.cpp +++ b/test/tf_exec.cpp @@ -238,6 +238,49 @@ TEST(tf_exec, axpy) { } } +TEST(tf_exec, axpyIntermediate) { + GraphDef gdef; + auto status = ReadTextProto(Env::Default(), "test_axpy.pbtxt", &gdef); + ASSERT_TRUE(status == Status::OK()) << "Can't read protobuf graph"; + + SessionOptions options; + ConfigProto& config = options.config; + config.set_allow_soft_placement(true); + std::unique_ptr session(NewSession(options)); + + ASSERT_OK(session->Create(gdef)); + + // Create the inputs for this graph + Tensor x(DT_FLOAT, TensorShape({2, 3})); + auto x_flat = x.flat(); + for (int i = 0; i < x_flat.size(); i++) { + x_flat.data()[i] = 1.0; + } + + Tensor y(DT_FLOAT, TensorShape({2, 3})); + auto y_flat = y.flat(); + for (int i = 0; i < y_flat.size(); i++) { + y_flat.data()[i] = 1.0; + } + + std::vector outputs; + + ASSERT_OK(session->Run({{"x", x}, {"y", y}}, {"mul"}, {}, &outputs)); + + ASSERT_EQ(outputs.size(), 1); + auto mat1 = outputs[0].matrix(); + EXPECT_FLOAT_EQ(5.0, mat1(0, 0)); + EXPECT_FLOAT_EQ(5.0, mat1(1, 0)); + + for (auto output : outputs) { + auto output_flat = output.flat(); + for (int i = 0; i < x_flat.size(); i++) { + cout << output_flat.data()[i] << " "; + } + cout << endl; + } +} + TEST(tf_exec, DISABLED_BatchMatMul_0D) { Scope root = Scope::NewRootScope(); auto dev_scope = root.WithDevice("/device:NGRAPH:0"); From b25e4824da533e514c33d999ad124fd3d7a9b8af Mon Sep 17 00:00:00 2001 From: Sayantan Sarkar Date: Mon, 9 Dec 2019 00:47:33 -0800 Subject: [PATCH 10/18] Fixing allocate_persistent call for cases with >1 output --- ngraph_bridge/ngraph_encapsulate_op.cc | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 19e8a87d1..4d27d1b2d 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -355,7 +355,8 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) { // Allocate tensors for the output results. vector> ng_outputs; std::vector tf_output_tensors; - std::vector cached_persistent_output_tensors; + std::vector cached_persistent_output_tensors( + ng_exec->get_results().size()); bool present_in_cache = false; { @@ -400,14 +401,10 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) { output_tensor = cached_persistent_output_tensors[i].AccessTensor(ctx); } else { // create a persistent tensor - PersistentTensor out_persistent; - OP_REQUIRES_OK(ctx, ctx->allocate_persistent( - ctx->expected_output_dtype(i), tf_shape, - &out_persistent, &output_tensor)); - cout << "ctx->expected_output_dtype(i): " - << ctx->expected_output_dtype(i) << "\n"; - cout << "Creation time: " << output_tensor->dtype() << "\n"; - cached_persistent_output_tensors.push_back(out_persistent); + OP_REQUIRES_OK( + ctx, ctx->allocate_persistent( + ctx->expected_output_dtype(i), tf_shape, + &cached_persistent_output_tensors[i], &output_tensor)); } } else { OP_REQUIRES_OK(ctx, ctx->allocate_output(i, tf_shape, &output_tensor)); From cf5b58c6f0ddf45cc9a0d6f711af391bf47195ad Mon Sep 17 00:00:00 2001 From: Sayantan Sarkar Date: Mon, 9 Dec 2019 00:48:28 -0800 Subject: [PATCH 11/18] Cleanup --- ngraph_bridge/ngraph_encapsulate_op.cc | 8 -------- 1 file changed, 8 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 4d27d1b2d..444a8c29b 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -658,17 +658,9 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) { ng_exec, cached_persistent_output_tensors)); auto out_tensor = cached_persistent_output_tensors[out_idx].AccessTensor(ctx); - - // TODO uncomment - cout << "----\nSetting output " << out_idx << " --- " - << out_tensor->dtype() << " " << out_tensor->NumElements() - << "----\n"; - // ctx->set_output(out_idx, *tf_output_tensors[out_idx]); ctx->set_output(out_idx, *out_tensor); } } - - cout << "\n"; } // end compute int NGraphEncapsulateImpl::s_instance_count = 0; From 34049e1b07de366afcce7f7d22cbfa827560179f Mon Sep 17 00:00:00 2001 From: Sayantan Sarkar Date: Mon, 9 Dec 2019 00:50:44 -0800 Subject: [PATCH 12/18] Cleanup --- ngraph_bridge/ngraph_encapsulate_impl.cc | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_impl.cc b/ngraph_bridge/ngraph_encapsulate_impl.cc index d099600d7..3e0ccad81 100644 --- a/ngraph_bridge/ngraph_encapsulate_impl.cc +++ b/ngraph_bridge/ngraph_encapsulate_impl.cc @@ -320,7 +320,6 @@ Status NGraphEncapsulateImpl::AllocateNGInputTensors( current_src_ptr, last_src_ptr, last_ng_tensor, false, ng_exec, op_backend, ng_element_type, ng_shape, m_executable_can_create_tensor ? inp_group_from_pipeline[i] : nullptr); - cout << "Using input tensor ptr: " << current_ng_tensor << "\n"; bool is_cpu = m_op_backend_name == "CPU"; if (!is_cpu && current_ng_tensor->get_stale()) { @@ -396,7 +395,6 @@ Status NGraphEncapsulateImpl::AllocateNGOutputTensors( current_dst_ptr, last_dst_ptr, last_ng_tensor, true, ng_exec, op_backend, ng_element_type, ng_shape, m_executable_can_create_tensor ? out_group_from_pipeline[i] : nullptr); - cout << "Using output tensor ptr: " << current_ng_tensor << "\n"; current_ng_tensor->set_stale(true); output_caches[i] = std::make_pair(current_dst_ptr, current_ng_tensor); ng_outputs.push_back(current_ng_tensor); @@ -417,7 +415,7 @@ std::shared_ptr NGraphEncapsulateImpl::GetCurrentNgTensor( // NOTE: we assume that TF's pointers WILL change if it actually changes // values. ie, it will not reuse the same space if its rewritten it bool tf_tensor_has_changed = current_tf_ptr != last_tf_ptr; - cout << "tf_tensor_has_changed: " << tf_tensor_has_changed << "\n"; + NGRAPH_VLOG(5) << "tf_tensor_has_changed: " << tf_tensor_has_changed; bool no_ng_tensor_found = last_ng_tensor == nullptr; // m_op_backend_name might be BE:0, check if it starts with BE bool is_cpu_or_nnpi = (m_op_backend_name.find("CPU") == 0) || @@ -454,8 +452,8 @@ std::shared_ptr NGraphEncapsulateImpl::GetCurrentNgTensor( } else { if (need_new_tensor_creation) { if (is_cpu_or_nnpi) { - cout << "Backend creating tensor with pointer: " << current_tf_ptr - << "\n"; + NGRAPH_VLOG(5) << "Backend creating tensor with pointer: " + << current_tf_ptr; current_ng_tensor = op_backend->create_tensor(ng_element_type, ng_shape, current_tf_ptr); } else { @@ -609,7 +607,6 @@ Status NGraphEncapsulateImpl::RegisterOutputPersistentTensors( "Found an entry already exists in the cache for persistent tensors"); } m_out_persistents.emplace(exec, persistent_tensors); - cout << "m_out_persistents: " << m_out_persistents.size() << "\n"; return Status::OK(); } From 70dc3af5af91843eb8dba60f2c0eefca76d4bb80 Mon Sep 17 00:00:00 2001 From: Sayantan Sarkar Date: Mon, 9 Dec 2019 01:28:24 -0800 Subject: [PATCH 13/18] Fix for multithreaded case --- ngraph_bridge/ngraph_encapsulate_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 444a8c29b..589581e9b 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -411,7 +411,7 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) { } tf_output_tensors.push_back(output_tensor); } - if (!present_in_cache) { + if (m_use_persistent && !present_in_cache) { OP_REQUIRES_OK(ctx, ng_encap_impl.RegisterOutputPersistentTensors( ng_exec, cached_persistent_output_tensors)); } From e6b5431d285f9542aff6c8517a245a1ded496865 Mon Sep 17 00:00:00 2001 From: Sayantan Sarkar Date: Mon, 9 Dec 2019 02:21:54 -0800 Subject: [PATCH 14/18] Do not use persistenttensor for multithreaded case --- test/tf_exec.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/tf_exec.cpp b/test/tf_exec.cpp index fb8559e84..8e3dd5162 100644 --- a/test/tf_exec.cpp +++ b/test/tf_exec.cpp @@ -95,7 +95,16 @@ Status CreateSession(const string& graph_filename, const string& backend_name, return load_graph_status; } +// This test might fail when running with persistent output tensors +// Maybe because once we have computed outputs out to persistent tensors, +// the next thread comes in before Compare runs, and changes the values? +// For example, if we add a 1sec sleep right after entering Compute(), this test +// would pass (since we now allow enough time for compare to run before the next +// thread comes in and modifies the persistent tensor values) +// TODO: see how persistenttensors might fit in with this kind of multithreading +// (symmetric parallel) TEST(tf_exec, SingleGraphOn2Threads) { + SetEnvVariable("NGRAPH_TF_NOT_PERSISTENT", "1"); string graph_name = "test_axpy.pbtxt"; vector backends{"CPU", "INTERPRETER"}; for (auto be : backends) { @@ -136,6 +145,7 @@ TEST(tf_exec, SingleGraphOn2Threads) { thread0.join(); thread1.join(); } + UnsetEnvVariable("NGRAPH_TF_NOT_PERSISTENT"); } TEST(tf_exec, model) { From b8ad7b64ea1e30e4fef1bd73a8837041377e0d2a Mon Sep 17 00:00:00 2001 From: Sayantan Sarkar Date: Wed, 11 Dec 2019 10:23:03 -0800 Subject: [PATCH 15/18] Remove temp tests --- test/tf_exec.cpp | 73 ------------------------------------------------ 1 file changed, 73 deletions(-) diff --git a/test/tf_exec.cpp b/test/tf_exec.cpp index 8e3dd5162..843699710 100644 --- a/test/tf_exec.cpp +++ b/test/tf_exec.cpp @@ -148,36 +148,6 @@ TEST(tf_exec, SingleGraphOn2Threads) { UnsetEnvVariable("NGRAPH_TF_NOT_PERSISTENT"); } -TEST(tf_exec, model) { - string graph_name = "test_axpy.pbtxt"; - - unique_ptr session; - ASSERT_OK(CreateSession(graph_name, "CPU", session)); - - string inp_tensor_name_0{"x"}; - string inp_tensor_name_1{"y"}; - string out_tensor_name{"add"}; - std::vector out_tensor_vals; - - Tensor inp_tensor_val(tensorflow::DT_FLOAT, tensorflow::TensorShape({2, 3})); - Tensor out_tensor_expected_val(tensorflow::DT_FLOAT, - tensorflow::TensorShape({2, 3})); - for (int i = 0; i < 10; i++) { - vector in_vals(6, float(i)); - AssignInputValues(inp_tensor_val, in_vals); - - vector out_vals(6, 6.0 * float(i)); - AssignInputValues(out_tensor_expected_val, out_vals); - - std::vector> inputs = { - {inp_tensor_name_0, inp_tensor_val}, - {inp_tensor_name_1, inp_tensor_val}}; - - ASSERT_OK(session->Run(inputs, {out_tensor_name}, {}, &out_tensor_vals)); - Compare(out_tensor_vals, {out_tensor_expected_val}); - } -} - TEST(tf_exec, hello_world) { Scope root = Scope::NewRootScope(); @@ -248,49 +218,6 @@ TEST(tf_exec, axpy) { } } -TEST(tf_exec, axpyIntermediate) { - GraphDef gdef; - auto status = ReadTextProto(Env::Default(), "test_axpy.pbtxt", &gdef); - ASSERT_TRUE(status == Status::OK()) << "Can't read protobuf graph"; - - SessionOptions options; - ConfigProto& config = options.config; - config.set_allow_soft_placement(true); - std::unique_ptr session(NewSession(options)); - - ASSERT_OK(session->Create(gdef)); - - // Create the inputs for this graph - Tensor x(DT_FLOAT, TensorShape({2, 3})); - auto x_flat = x.flat(); - for (int i = 0; i < x_flat.size(); i++) { - x_flat.data()[i] = 1.0; - } - - Tensor y(DT_FLOAT, TensorShape({2, 3})); - auto y_flat = y.flat(); - for (int i = 0; i < y_flat.size(); i++) { - y_flat.data()[i] = 1.0; - } - - std::vector outputs; - - ASSERT_OK(session->Run({{"x", x}, {"y", y}}, {"mul"}, {}, &outputs)); - - ASSERT_EQ(outputs.size(), 1); - auto mat1 = outputs[0].matrix(); - EXPECT_FLOAT_EQ(5.0, mat1(0, 0)); - EXPECT_FLOAT_EQ(5.0, mat1(1, 0)); - - for (auto output : outputs) { - auto output_flat = output.flat(); - for (int i = 0; i < x_flat.size(); i++) { - cout << output_flat.data()[i] << " "; - } - cout << endl; - } -} - TEST(tf_exec, DISABLED_BatchMatMul_0D) { Scope root = Scope::NewRootScope(); auto dev_scope = root.WithDevice("/device:NGRAPH:0"); From f394d2e2c86281ddee6e0aed1823ef11d188566f Mon Sep 17 00:00:00 2001 From: Sayantan Sarkar Date: Wed, 11 Dec 2019 10:25:18 -0800 Subject: [PATCH 16/18] Changing flag name --- ngraph_bridge/ngraph_encapsulate_op.cc | 2 +- test/tf_exec.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 589581e9b..ffdc5b33b 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -209,7 +209,7 @@ NGraphEncapsulateOp::NGraphEncapsulateOp(OpKernelConstruction* ctx) << (ng_encap_impl.GetExecCanCreateTensor() ? "" : "not") << " create tensors"; - const char* not_persistent_flag = std::getenv("NGRAPH_TF_NOT_PERSISTENT"); + const char* not_persistent_flag = std::getenv("NGRAPH_TF_DISABLE_PERSISTENT"); m_use_persistent = (not_persistent_flag == nullptr); event.Stop(); diff --git a/test/tf_exec.cpp b/test/tf_exec.cpp index 843699710..65b1d7ccd 100644 --- a/test/tf_exec.cpp +++ b/test/tf_exec.cpp @@ -104,7 +104,7 @@ Status CreateSession(const string& graph_filename, const string& backend_name, // TODO: see how persistenttensors might fit in with this kind of multithreading // (symmetric parallel) TEST(tf_exec, SingleGraphOn2Threads) { - SetEnvVariable("NGRAPH_TF_NOT_PERSISTENT", "1"); + SetEnvVariable("NGRAPH_TF_DISABLE_PERSISTENT", "1"); string graph_name = "test_axpy.pbtxt"; vector backends{"CPU", "INTERPRETER"}; for (auto be : backends) { @@ -145,7 +145,7 @@ TEST(tf_exec, SingleGraphOn2Threads) { thread0.join(); thread1.join(); } - UnsetEnvVariable("NGRAPH_TF_NOT_PERSISTENT"); + UnsetEnvVariable("NGRAPH_TF_DISABLE_PERSISTENT"); } TEST(tf_exec, hello_world) { From 8e3ad6008f54de769e6ca6f5556b01e629e45953 Mon Sep 17 00:00:00 2001 From: Sayantan Sarkar Date: Wed, 11 Dec 2019 10:27:20 -0800 Subject: [PATCH 17/18] Minor --- ngraph_bridge/ngraph_encapsulate_impl.h | 1 - 1 file changed, 1 deletion(-) diff --git a/ngraph_bridge/ngraph_encapsulate_impl.h b/ngraph_bridge/ngraph_encapsulate_impl.h index 024aa1901..2eb9c6935 100644 --- a/ngraph_bridge/ngraph_encapsulate_impl.h +++ b/ngraph_bridge/ngraph_encapsulate_impl.h @@ -264,7 +264,6 @@ class NGraphEncapsulateImpl { // each executable (which comes from a new shape) corresponds to a vector of // output tensors - // TODO: give better name // TODO: Should the vector store PersistentTensor or PersistentTensor* ? std::unordered_map, std::vector> From a6eef4b542b8e8bccf59e6c00a39b731e0e5dff1 Mon Sep 17 00:00:00 2001 From: Sayantan Sarkar Date: Wed, 11 Dec 2019 16:46:33 -0800 Subject: [PATCH 18/18] Change function name according to review comments --- ngraph_bridge/ngraph_encapsulate_impl.cc | 2 +- ngraph_bridge/ngraph_encapsulate_impl.h | 2 +- ngraph_bridge/ngraph_encapsulate_op.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_impl.cc b/ngraph_bridge/ngraph_encapsulate_impl.cc index 3e0ccad81..0931be42f 100644 --- a/ngraph_bridge/ngraph_encapsulate_impl.cc +++ b/ngraph_bridge/ngraph_encapsulate_impl.cc @@ -598,7 +598,7 @@ bool NGraphEncapsulateImpl::PersistentOutputsExist( return m_out_persistents.find(exec) != m_out_persistents.end(); } -Status NGraphEncapsulateImpl::RegisterOutputPersistentTensors( +Status NGraphEncapsulateImpl::RegisterPersistentOutputTensors( std::shared_ptr exec, std::vector persistent_tensors) { auto itr = m_out_persistents.find(exec); diff --git a/ngraph_bridge/ngraph_encapsulate_impl.h b/ngraph_bridge/ngraph_encapsulate_impl.h index 2eb9c6935..d5fbe0e09 100644 --- a/ngraph_bridge/ngraph_encapsulate_impl.h +++ b/ngraph_bridge/ngraph_encapsulate_impl.h @@ -182,7 +182,7 @@ class NGraphEncapsulateImpl { bool PersistentOutputsExist(std::shared_ptr); - Status RegisterOutputPersistentTensors( + Status RegisterPersistentOutputTensors( std::shared_ptr, std::vector); diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index ffdc5b33b..2759319a7 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -412,7 +412,7 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) { tf_output_tensors.push_back(output_tensor); } if (m_use_persistent && !present_in_cache) { - OP_REQUIRES_OK(ctx, ng_encap_impl.RegisterOutputPersistentTensors( + OP_REQUIRES_OK(ctx, ng_encap_impl.RegisterPersistentOutputTensors( ng_exec, cached_persistent_output_tensors)); } OP_REQUIRES_OK(ctx, ng_encap_impl.AllocateNGOutputTensors(